8222271: [s390] optimize register usage in C2 instruction forms for clearing arrays

Reviewed-by: mdoerr, lucy
This commit is contained in:
Richard Reingruber 2019-04-16 08:51:01 +02:00
parent c6497e3f25
commit ced9f6cb77
3 changed files with 49 additions and 32 deletions

View File

@ -4355,12 +4355,9 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
// Emitter does not KILL cnt and base arguments, since they need to be copied to
// work registers anyway.
// Actually, only r0, r1, and r5 are killed.
unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
// Src_addr is evenReg.
// Src_len is odd_Reg.
unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) {
int block_start = offset();
Register tmp_reg = src_len; // Holds target instr addr for EX.
Register dst_len = Z_R1; // Holds dst len for MVCLE.
Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
@ -4369,7 +4366,7 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
BLOCK_COMMENT("Clear_Array {");
// Check for zero len and convert to long.
z_ltgfr(src_len, cnt_arg); // Remember casted value for doSTG case.
z_ltgfr(odd_tmp_reg, cnt_arg);
z_bre(done); // Nothing to do if len == 0.
// Prefetch data to be cleared.
@ -4378,16 +4375,17 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
z_pfd(0x02, 256, Z_R0, base_pointer_arg);
}
z_sllg(dst_len, src_len, 3); // #bytes to clear.
z_cghi(src_len, 32); // Check for len <= 256 bytes (<=32 DW).
z_brnh(doXC); // If so, use executed XC to clear.
z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear.
z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW).
z_brnh(doXC); // If so, use executed XC to clear.
// MVCLE: initialize long arrays (general case).
bind(doMVCLE);
z_lgr(dst_addr, base_pointer_arg);
clear_reg(src_len, true, false); // Src len of MVCLE is zero.
MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
// The even register of the register pair is not killed.
clear_reg(odd_tmp_reg, true, false);
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0);
z_bru(done);
// XC: initialize short arrays.
@ -4396,12 +4394,12 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
bind(doXC);
add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
if (VM_Version::has_ExecuteExtensions()) {
z_exrl(dst_len, XC_template); // Execute XC with var. len.
z_exrl(dst_len, XC_template); // Execute XC with var. len.
} else {
z_larl(tmp_reg, XC_template);
z_ex(dst_len,0,Z_R0,tmp_reg); // Execute XC with var. len.
z_larl(odd_tmp_reg, XC_template);
z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len.
}
// z_bru(done); // fallthru
@ -4463,7 +4461,7 @@ unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
// Compiler ensures base is doubleword aligned and cnt is #doublewords.
// Emitter does not KILL cnt and base arguments, since they need to be copied to
// work registers anyway.
// Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
// Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed.
//
// For very large arrays, exploit MVCLE H/W support.
// MVCLE instruction automatically exploits H/W-optimized page mover.
@ -4471,9 +4469,7 @@ unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
// - All full pages are cleared with the page mover H/W assist.
// - Remaining bytes are again cleared by a series of XC to self.
//
unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
// Src_addr is evenReg.
// Src_len is odd_Reg.
unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) {
int block_start = offset();
Register dst_len = Z_R1; // Holds dst len for MVCLE.
@ -4486,11 +4482,10 @@ unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_point
// Prepare other args to MVCLE.
z_lgr(dst_addr, base_pointer_arg);
// Indicate unused result.
(void) clear_reg(src_len, true, false); // Src len of MVCLE is zero.
// Clear.
MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
// The even register of the register pair is not killed.
(void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero.
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0);
BLOCK_COMMENT("} Clear_Array_Const_Big");
int block_end = offset();

View File

@ -828,9 +828,9 @@ class MacroAssembler: public Assembler {
//--------------------------
//--- Operations on arrays.
//--------------------------
unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len);
unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg);
unsigned int Clear_Array_Const(long cnt, Register base);
unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len);
unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg);
unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
Register cnt_reg,
Register tmp1_reg, Register tmp2_reg);

View File

@ -474,6 +474,19 @@ reg_class z_long_reg(
/*Z_R15_H,Z_R15*/ // SP
);
// z_long_reg without even registers
reg_class z_long_odd_reg(
/*Z_R0_H,Z_R0*/ // R0
/*Z_R1_H,Z_R1*/
Z_R3_H,Z_R3,
Z_R5_H,Z_R5,
Z_R7_H,Z_R7,
Z_R9_H,Z_R9,
Z_R11_H,Z_R11,
Z_R13_H,Z_R13
/*Z_R14_H,Z_R14,*/ // return_pc
/*Z_R15_H,Z_R15*/ // SP
);
// Special Class for Condition Code Flags Register
@ -3378,6 +3391,7 @@ operand iRegL() %{
match(RegL);
match(revenRegL);
match(roddRegL);
match(allRoddRegL);
match(rarg1RegL);
match(rarg5RegL);
format %{ %}
@ -3400,6 +3414,14 @@ operand roddRegL() %{
interface(REG_INTER);
%}
// available odd registers for iRegL
operand allRoddRegL() %{
constraint(ALLOC_IN_RC(z_long_odd_reg));
match(iRegL);
format %{ %}
interface(REG_INTER);
%}
operand rarg1RegL() %{
constraint(ALLOC_IN_RC(z_rarg1_long_reg));
match(iRegL);
@ -9899,23 +9921,23 @@ instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy,
ins_pipe(pipe_class_dummy);
%}
instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
match(Set dummy (ClearArray cnt base));
effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
ins_cost(200);
// TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to optimized constant loader.
format %{ "ClearArrayConstBig $cnt,$base" %}
ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $tmpL$$Register); %}
ins_pipe(pipe_class_dummy);
%}
instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
match(Set dummy (ClearArray cnt base));
effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
ins_cost(300);
// TODO: s390 port size(FIXED_SIZE); // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
format %{ "ClearArrayVar $cnt,$base" %}
ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $tmpL$$Register); %}
ins_pipe(pipe_class_dummy);
%}