8175368: [s390] Provide intrinsic implementation for CRC32C

Reviewed-by: mdoerr, simonis
This commit is contained in:
Lutz Schmidt 2017-03-08 14:55:32 +01:00
parent a235f9f6ef
commit 99a554c5ad
10 changed files with 904 additions and 556 deletions

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -3048,9 +3048,8 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
assert_different_registers(val, crc, res); assert_different_registers(val, crc, res);
__ load_const_optimized(res, StubRoutines::crc_table_addr()); __ load_const_optimized(res, StubRoutines::crc_table_addr());
__ not_(crc, noreg, false); // ~crc __ kernel_crc32_singleByteReg(crc, val, res, true);
__ update_byte_crc32(crc, val, res); __ z_lgfr(res, crc);
__ not_(res, crc, false); // ~crc
} }
#undef __ #undef __

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -61,20 +61,6 @@ void LIRItem::load_nonconstant(int bits) {
} }
} }
inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
LIR_Opr r = li.value()->operand();
if (r->is_constant()) {
// Constants get loaded with sign extend on this platform.
ll->move(li.result(), dst);
} else {
if (!r->is_register()) {
li.load_item_force(dst);
}
LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
}
}
//-------------------------------------------------------------- //--------------------------------------------------------------
// LIRGenerator // LIRGenerator
//-------------------------------------------------------------- //--------------------------------------------------------------
@ -1217,10 +1203,9 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
LIR_Opr arg2 = cc->at(1); LIR_Opr arg2 = cc->at(1);
LIR_Opr arg3 = cc->at(2); LIR_Opr arg3 = cc->at(2);
// CCallingConventionRequiresIntsAsLongs
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits. crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
__ leal(LIR_OprFact::address(a), arg2); __ leal(LIR_OprFact::address(a), arg2);
load_int_as_long(gen()->lir(), len, arg3); len.load_item_force(arg3); // We skip int->long conversion here, because CRC32 stub expects int.
__ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args()); __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
__ move(result_reg, result); __ move(result_reg, result);
@ -1233,7 +1218,60 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
} }
void LIRGenerator::do_update_CRC32C(Intrinsic* x) { void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
Unimplemented(); assert(UseCRC32CIntrinsics, "or should not be here");
LIR_Opr result = rlock_result(x);
switch (x->id()) {
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C: {
bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
LIRItem crc(x->argument_at(0), this);
LIRItem buf(x->argument_at(1), this);
LIRItem off(x->argument_at(2), this);
LIRItem len(x->argument_at(3), this);
buf.load_item();
off.load_nonconstant();
LIR_Opr index = off.result();
int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
if (off.result()->is_constant()) {
index = LIR_OprFact::illegalOpr;
offset += off.result()->as_jint();
}
LIR_Opr base_op = buf.result();
if (index->is_valid()) {
LIR_Opr tmp = new_register(T_LONG);
__ convert(Bytecodes::_i2l, index, tmp);
index = tmp;
}
LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE);
BasicTypeList signature(3);
signature.append(T_INT);
signature.append(T_ADDRESS);
signature.append(T_INT);
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
const LIR_Opr result_reg = result_register_for (x->type());
LIR_Opr arg1 = cc->at(0);
LIR_Opr arg2 = cc->at(1);
LIR_Opr arg3 = cc->at(2);
crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32C stub doesn't care about high bits.
__ leal(LIR_OprFact::address(a), arg2);
len.load_item_force(arg3); // We skip int->long conversion here, because CRC32C stub expects int.
__ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), LIR_OprFact::illegalOpr, result_reg, cc->args());
__ move(result_reg, result);
break;
}
default: {
ShouldNotReachHere();
}
}
} }
void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
@ -1264,4 +1302,3 @@ void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
fatal("vectorizedMismatch intrinsic is not implemented on this platform"); fatal("vectorizedMismatch intrinsic is not implemented on this platform");
} }

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -5910,8 +5910,7 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
* @param len register containing number of bytes * @param len register containing number of bytes
* @param table register pointing to CRC table * @param table register pointing to CRC table
*/ */
void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, Register data) {
Register data, bool invertCRC) {
assert_different_registers(crc, buf, len, table, data); assert_different_registers(crc, buf, len, table, data);
Label L_mainLoop, L_done; Label L_mainLoop, L_done;
@ -5921,20 +5920,12 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
z_ltr(len, len); z_ltr(len, len);
z_brnh(L_done); z_brnh(L_done);
if (invertCRC) {
not_(crc, noreg, false); // ~c
}
bind(L_mainLoop); bind(L_mainLoop);
z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
add2reg(buf, mainLoop_stepping); // Advance buffer position. add2reg(buf, mainLoop_stepping); // Advance buffer position.
update_byte_crc32(crc, data, table); update_byte_crc32(crc, data, table);
z_brct(len, L_mainLoop); // Iterate. z_brct(len, L_mainLoop); // Iterate.
if (invertCRC) {
not_(crc, noreg, false); // ~c
}
bind(L_done); bind(L_done);
} }
@ -5951,6 +5942,7 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
// c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
// crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
// #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
// Pre-calculate (constant) column offsets, use columns 4..7 for big-endian.
const int ix0 = 4*(4*CRC32_COLUMN_SIZE); const int ix0 = 4*(4*CRC32_COLUMN_SIZE);
const int ix1 = 5*(4*CRC32_COLUMN_SIZE); const int ix1 = 5*(4*CRC32_COLUMN_SIZE);
const int ix2 = 6*(4*CRC32_COLUMN_SIZE); const int ix2 = 6*(4*CRC32_COLUMN_SIZE);
@ -5969,17 +5961,12 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2
rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2
// Load pre-calculated table values. // XOR indexed table values to calculate updated crc.
// Use columns 4..7 for big-endian.
z_ly(t3, Address(table, t3, (intptr_t)ix0));
z_ly(t2, Address(table, t2, (intptr_t)ix1)); z_ly(t2, Address(table, t2, (intptr_t)ix1));
z_ly(t1, Address(table, t1, (intptr_t)ix2));
z_ly(t0, Address(table, t0, (intptr_t)ix3)); z_ly(t0, Address(table, t0, (intptr_t)ix3));
z_xy(t2, Address(table, t3, (intptr_t)ix0));
// Calculate new crc from table values. z_xy(t0, Address(table, t1, (intptr_t)ix2));
z_xr(t2, t3); z_xr(t0, t2); // Now t0 contains the updated CRC value.
z_xr(t0, t1);
z_xr(t0, t2); // Now crc contains the final checksum value.
lgr_if_needed(crc, t0); lgr_if_needed(crc, t0);
} }
@ -5992,7 +5979,8 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/ */
void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3) { Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail; Label L_mainLoop, L_tail;
@ -6007,7 +5995,9 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
// The situation itself is detected and handled correctly by the conditional branches // The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping). // following aghi(len, -stepping) and aghi(len, +stepping).
not_(crc, noreg, false); // 1s complement of crc if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
#if 0 #if 0
{ {
@ -6022,7 +6012,7 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc
z_sgfr(len, ctr); // Remaining len after alignment. z_sgfr(len, ctr); // Remaining len after alignment.
update_byteLoop_crc32(crc, buf, ctr, table, data, false); update_byteLoop_crc32(crc, buf, ctr, table, data);
} }
#endif #endif
@ -6030,21 +6020,23 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
z_srag(ctr, len, log_stepping); z_srag(ctr, len, log_stepping);
z_brnh(L_tail); z_brnh(L_tail);
z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data. z_lrvr(crc, crc); // Revert byte order because we are dealing with big-endian data.
rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop rotate_then_insert(len, len, 64-log_stepping, 63, 0, true); // #bytes for tailLoop
BIND(L_mainLoop); BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3); update_1word_crc32(crc, buf, table, 0, 0, crc, t1, t2, t3);
update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3); update_1word_crc32(crc, buf, table, 4, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate. z_brct(ctr, L_mainLoop); // Iterate.
z_lrvr(crc, crc); // Revert byte order back to original. z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer. // Process last few (<8) bytes of buffer.
BIND(L_tail); BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false); update_byteLoop_crc32(crc, buf, len, table, data);
not_(crc, noreg, false); // 1s complement of crc if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
} }
/** /**
@ -6056,7 +6048,8 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
* uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller!
*/ */
void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3) { Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Label L_mainLoop, L_tail; Label L_mainLoop, L_tail;
@ -6070,7 +6063,9 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
// The situation itself is detected and handled correctly by the conditional branches // The situation itself is detected and handled correctly by the conditional branches
// following aghi(len, -stepping) and aghi(len, +stepping). // following aghi(len, -stepping) and aghi(len, +stepping).
not_(crc, noreg, false); // 1s complement of crc if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
// Check for short (<4 bytes) buffer. // Check for short (<4 bytes) buffer.
z_srag(ctr, len, log_stepping); z_srag(ctr, len, log_stepping);
@ -6082,13 +6077,16 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
BIND(L_mainLoop); BIND(L_mainLoop);
update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3); update_1word_crc32(crc, buf, table, 0, mainLoop_stepping, crc, t1, t2, t3);
z_brct(ctr, L_mainLoop); // Iterate. z_brct(ctr, L_mainLoop); // Iterate.
z_lrvr(crc, crc); // Revert byte order back to original. z_lrvr(crc, crc); // Revert byte order back to original.
// Process last few (<8) bytes of buffer. // Process last few (<8) bytes of buffer.
BIND(L_tail); BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false); update_byteLoop_crc32(crc, buf, len, table, data);
not_(crc, noreg, false); // 1s complement of crc if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
} }
/** /**
@ -6098,22 +6096,51 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
* @param table register pointing to CRC table * @param table register pointing to CRC table
*/ */
void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3) { Register t0, Register t1, Register t2, Register t3,
bool invertCRC) {
assert_different_registers(crc, buf, len, table); assert_different_registers(crc, buf, len, table);
Register data = t0; Register data = t0;
update_byteLoop_crc32(crc, buf, len, table, data, true); if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
update_byteLoop_crc32(crc, buf, len, table, data);
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
} }
void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) { void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
bool invertCRC) {
assert_different_registers(crc, buf, len, table, tmp); assert_different_registers(crc, buf, len, table, tmp);
not_(crc, noreg, false); // ~c if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register. z_llgc(tmp, Address(buf, (intptr_t)0)); // Current byte of input buffer (zero extended). Avoids garbage in upper half of register.
update_byte_crc32(crc, tmp, table); update_byte_crc32(crc, tmp, table);
not_(crc, noreg, false); // ~c if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
}
void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC) {
assert_different_registers(crc, val, table);
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
update_byte_crc32(crc, val, table);
if (invertCRC) {
not_(crc, noreg, false); // 1s complement of crc
}
} }
// //

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1009,22 +1009,35 @@ class MacroAssembler: public Assembler {
int before = 0, int after = 0) PRODUCT_RETURN; int before = 0, int after = 0) PRODUCT_RETURN;
// Emitters for CRC32 calculation. // Emitters for CRC32 calculation.
// A note on invertCRC:
// Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
// CRC32 holds it's current crc value in the externally visible representation.
// CRC32C holds it's current crc value in internal format, ready for updating.
// Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
// In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
// The bool invertCRC parameter indicates whether bit-flipping is required before updates.
private: private:
void fold_byte_crc32(Register crc, Register table, Register val, Register tmp); void fold_byte_crc32(Register crc, Register table, Register val, Register tmp);
void fold_8bit_crc32(Register crc, Register table, Register tmp); void fold_8bit_crc32(Register crc, Register table, Register tmp);
void update_byte_crc32( Register crc, Register val, Register table);
void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
Register data, bool invertCRC); Register data);
void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
Register t0, Register t1, Register t2, Register t3); Register t0, Register t1, Register t2, Register t3);
public: public:
void update_byte_crc32( Register crc, Register val, Register table); void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp); bool invertCRC);
void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
bool invertCRC);
void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3); Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3); Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
void kernel_crc32_2word(Register crc, Register buf, Register len, Register table, void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
Register t0, Register t1, Register t2, Register t3); Register t0, Register t1, Register t2, Register t3,
bool invertCRC);
// Emitters for BigInteger.multiplyToLen intrinsic // Emitters for BigInteger.multiplyToLen intrinsic
// note: length of result array (zlen) is passed on the stack // note: length of result array (zlen) is passed on the stack

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -2310,26 +2310,25 @@ class StubGenerator: public StubCodeGenerator {
} }
/**
// Arguments: * Arguments:
// Z_ARG1 - int crc *
// Z_ARG2 - byte* buf * Inputs:
// Z_ARG3 - int length (of buffer) * Z_ARG1 - int crc
// * Z_ARG2 - byte* buf
// Result: * Z_ARG3 - int length (of buffer)
// Z_RET - int crc result *
// * Result:
// Compute CRC32 function. * Z_RET - int crc result
address generate_CRC32_updateBytes(const char* name) { **/
__ align(CodeEntryAlignment); // Compute CRC function (generic, for all polynomials).
StubCodeMark mark(this, "StubRoutines", name); void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
// arguments to kernel_crc32: // arguments to kernel_crc32:
Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int.
Register data = Z_ARG2; // source byte array Register data = Z_ARG2; // source byte array
Register dataLen = Z_ARG3; // #bytes to process, int Register dataLen = Z_ARG3; // #bytes to process, int
Register table = Z_ARG4; // crc table address // Register table = Z_ARG4; // crc table address. Preloaded and passed in by caller.
const Register t0 = Z_R10; // work reg for kernel* emitters const Register t0 = Z_R10; // work reg for kernel* emitters
const Register t1 = Z_R11; // work reg for kernel* emitters const Register t1 = Z_R11; // work reg for kernel* emitters
const Register t2 = Z_R12; // work reg for kernel* emitters const Register t2 = Z_R12; // work reg for kernel* emitters
@ -2341,16 +2340,50 @@ class StubGenerator: public StubCodeGenerator {
// Crc used as int. // Crc used as int.
__ z_llgfr(dataLen, dataLen); __ z_llgfr(dataLen, dataLen);
StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
__ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
__ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers.
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3); __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC);
__ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack.
__ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
__ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits.
__ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1.
}
// Compute CRC32 function.
address generate_CRC32_updateBytes(const char* name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", name);
BLOCK_COMMENT("CRC32_updateBytes {");
Register table = Z_ARG4; // crc32 table address.
StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
generate_CRC_updateBytes(name, table, true);
BLOCK_COMMENT("} CRC32_updateBytes");
return __ addr_at(start_off);
}
// Compute CRC32C function.
address generate_CRC32C_updateBytes(const char* name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
unsigned int start_off = __ offset(); // Remember stub start address (is rtn value).
assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", name);
BLOCK_COMMENT("CRC32C_updateBytes {");
Register table = Z_ARG4; // crc32c table address.
StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table);
generate_CRC_updateBytes(name, table, false);
BLOCK_COMMENT("} CRC32C_updateBytes");
return __ addr_at(start_off); return __ addr_at(start_off);
} }
@ -2421,9 +2454,13 @@ class StubGenerator: public StubCodeGenerator {
// Entry points that are platform specific. // Entry points that are platform specific.
if (UseCRC32Intrinsics) { if (UseCRC32Intrinsics) {
// We have no CRC32 table on z/Architecture. StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table;
StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); }
if (UseCRC32CIntrinsics) {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
} }
// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -72,6 +72,7 @@ class zarch {
static address _partial_subtype_check; static address _partial_subtype_check;
static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE]; static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
static juint _crc32c_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
static address _trot_table_addr; static address _trot_table_addr;
@ -91,7 +92,9 @@ class zarch {
static address partial_subtype_check() { return _partial_subtype_check; } static address partial_subtype_check() { return _partial_subtype_check; }
static void generate_load_absolute_address(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
static void generate_load_crc_table_addr(MacroAssembler* masm, Register table); static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
static void generate_load_crc32c_table_addr(MacroAssembler* masm, Register table);
// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction.
static void generate_load_trot_table_addr(MacroAssembler* masm, Register table); static void generate_load_trot_table_addr(MacroAssembler* masm, Register table);

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved. * Copyright (c) 2016, 2017, SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1964,7 +1964,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
__ z_llgf(crc, 2 * wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. __ z_llgf(crc, 2 * wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register.
StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); StubRoutines::zarch::generate_load_crc_table_addr(_masm, table);
__ kernel_crc32_singleByte(crc, data, dataLen, table, Z_R1); __ kernel_crc32_singleByte(crc, data, dataLen, table, Z_R1, true);
// Restore caller sp for c2i case. // Restore caller sp for c2i case.
__ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
@ -2020,10 +2020,10 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
// data = buf + off // data = buf + off
BLOCK_COMMENT("CRC32_updateByteBuffer {"); BLOCK_COMMENT("CRC32_updateByteBuffer {");
__ z_llgf(crc, 5*wordSize, argP); // current crc state __ z_llgf(crc, 5*wordSize, argP); // current crc state
__ z_lg(data, 3*wordSize, argP); // start of byte buffer __ z_lg(data, 3*wordSize, argP); // start of byte buffer
__ z_agf(data, 2*wordSize, argP); // Add byte buffer offset. __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
__ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
} else { // Used for "updateBytes update". } else { // Used for "updateBytes update".
// crc @ (SP + 4W) (32bit) // crc @ (SP + 4W) (32bit)
// buf @ (SP + 3W) (64bit ptr to byte array) // buf @ (SP + 3W) (64bit ptr to byte array)
// off @ (SP + 2W) (32bit) // off @ (SP + 2W) (32bit)
@ -2031,7 +2031,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
// data = buf + off + base_offset // data = buf + off + base_offset
BLOCK_COMMENT("CRC32_updateBytes {"); BLOCK_COMMENT("CRC32_updateBytes {");
__ z_llgf(crc, 4*wordSize, argP); // current crc state __ z_llgf(crc, 4*wordSize, argP); // current crc state
__ z_lg(data, 3*wordSize, argP); // start of byte buffer __ z_lg(data, 3*wordSize, argP); // start of byte buffer
__ z_agf(data, 2*wordSize, argP); // Add byte buffer offset. __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
__ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
__ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE)); __ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
@ -2041,7 +2041,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
__ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
__ z_stmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 to make them available as work registers. __ z_stmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 to make them available as work registers.
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3); __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, true);
__ z_lmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 back from stack. __ z_lmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 back from stack.
// Restore caller sp for c2i case. // Restore caller sp for c2i case.
@ -2060,8 +2060,73 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
return NULL; return NULL;
} }
// Not supported
// Method entry for static native methods:
// int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int len)
// int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int len)
address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
if (UseCRC32CIntrinsics) {
uint64_t entry_off = __ offset();
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
// Load parameters.
// Z_esp is callers operand stack pointer, i.e. it points to the parameters.
const Register argP = Z_esp;
const Register crc = Z_ARG1; // crc value
const Register data = Z_ARG2; // address of java byte array
const Register dataLen = Z_ARG3; // source data len
const Register table = Z_ARG4; // address of crc32 table
const Register t0 = Z_R10; // work reg for kernel* emitters
const Register t1 = Z_R11; // work reg for kernel* emitters
const Register t2 = Z_R12; // work reg for kernel* emitters
const Register t3 = Z_R13; // work reg for kernel* emitters
// Arguments are reversed on java expression stack.
// Calculate address of start element.
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { // Used for "updateByteBuffer direct".
// crc @ (SP + 5W) (32bit)
// buf @ (SP + 3W) (64bit ptr to long array)
// off @ (SP + 2W) (32bit)
// dataLen @ (SP + 1W) (32bit)
// data = buf + off
BLOCK_COMMENT("CRC32C_updateDirectByteBuffer {");
__ z_llgf(crc, 5*wordSize, argP); // current crc state
__ z_lg(data, 3*wordSize, argP); // start of byte buffer
__ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
__ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
} else { // Used for "updateBytes update".
// crc @ (SP + 4W) (32bit)
// buf @ (SP + 3W) (64bit ptr to byte array)
// off @ (SP + 2W) (32bit)
// dataLen @ (SP + 1W) (32bit)
// data = buf + off + base_offset
BLOCK_COMMENT("CRC32C_updateBytes {");
__ z_llgf(crc, 4*wordSize, argP); // current crc state
__ z_lg(data, 3*wordSize, argP); // start of byte buffer
__ z_agf(data, 2*wordSize, argP); // Add byte buffer offset.
__ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process
__ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
}
StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table);
__ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers.
__ z_stmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 to make them available as work registers.
__ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, false);
__ z_lmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 back from stack.
// Restore caller sp for c2i case.
__ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started.
__ z_br(Z_R14);
BLOCK_COMMENT("} CRC32C_update{Bytes|DirectByteBuffer}");
return __ addr_at(entry_off);
}
return NULL; return NULL;
} }

View File

@ -111,13 +111,23 @@ void VM_Version::initialize() {
ContendedPaddingWidth = cache_line_size; ContendedPaddingWidth = cache_line_size;
} }
// On z/Architecture, the CRC32 intrinsics had to be implemented "by hand". // On z/Architecture, the CRC32/CRC32C intrinsics are implemented "by hand".
// They cannot be based on the CHECKSUM instruction which has been there // TODO: Provide implementation based on the vector instructions available from z13.
// since the very beginning (of z/Architecture). It computes "some kind of" a checksum // Note: The CHECKSUM instruction, which has been there since the very beginning
// which has nothing to do with the CRC32 algorithm. // (of z/Architecture), computes "some kind of" a checksum.
// It has nothing to do with the CRC32 algorithm.
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
} }
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
}
// TODO: Provide implementation.
if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
}
// On z/Architecture, we take UseAES as the general switch to enable/disable the AES intrinsics. // On z/Architecture, we take UseAES as the general switch to enable/disable the AES intrinsics.
// The specific, and yet to be defined, switches UseAESxxxIntrinsics will then be set // The specific, and yet to be defined, switches UseAESxxxIntrinsics will then be set
@ -195,11 +205,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
} }
if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
} }

View File

@ -212,7 +212,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateBytesCRC32:
case vmIntrinsics::_updateByteBufferCRC32: case vmIntrinsics::_updateByteBufferCRC32:
#ifdef SPARC #if defined(SPARC) || defined(S390)
case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C:
#endif #endif