8222271: [s390] optimize register usage in C2 instruction forms for clearing arrays
Reviewed-by: mdoerr, lucy
This commit is contained in:
parent
c6497e3f25
commit
ced9f6cb77
@ -4355,12 +4355,9 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
// Emitter does not KILL cnt and base arguments, since they need to be copied to
|
||||
// work registers anyway.
|
||||
// Actually, only r0, r1, and r5 are killed.
|
||||
unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) {
|
||||
// Src_addr is evenReg.
|
||||
// Src_len is odd_Reg.
|
||||
unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg) {
|
||||
|
||||
int block_start = offset();
|
||||
Register tmp_reg = src_len; // Holds target instr addr for EX.
|
||||
Register dst_len = Z_R1; // Holds dst len for MVCLE.
|
||||
Register dst_addr = Z_R0; // Holds dst addr for MVCLE.
|
||||
|
||||
@ -4369,7 +4366,7 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
|
||||
BLOCK_COMMENT("Clear_Array {");
|
||||
|
||||
// Check for zero len and convert to long.
|
||||
z_ltgfr(src_len, cnt_arg); // Remember casted value for doSTG case.
|
||||
z_ltgfr(odd_tmp_reg, cnt_arg);
|
||||
z_bre(done); // Nothing to do if len == 0.
|
||||
|
||||
// Prefetch data to be cleared.
|
||||
@ -4378,16 +4375,17 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
|
||||
z_pfd(0x02, 256, Z_R0, base_pointer_arg);
|
||||
}
|
||||
|
||||
z_sllg(dst_len, src_len, 3); // #bytes to clear.
|
||||
z_cghi(src_len, 32); // Check for len <= 256 bytes (<=32 DW).
|
||||
z_brnh(doXC); // If so, use executed XC to clear.
|
||||
z_sllg(dst_len, odd_tmp_reg, 3); // #bytes to clear.
|
||||
z_cghi(odd_tmp_reg, 32); // Check for len <= 256 bytes (<=32 DW).
|
||||
z_brnh(doXC); // If so, use executed XC to clear.
|
||||
|
||||
// MVCLE: initialize long arrays (general case).
|
||||
bind(doMVCLE);
|
||||
z_lgr(dst_addr, base_pointer_arg);
|
||||
clear_reg(src_len, true, false); // Src len of MVCLE is zero.
|
||||
|
||||
MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
|
||||
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
|
||||
// The even register of the register pair is not killed.
|
||||
clear_reg(odd_tmp_reg, true, false);
|
||||
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding()-1), 0);
|
||||
z_bru(done);
|
||||
|
||||
// XC: initialize short arrays.
|
||||
@ -4396,12 +4394,12 @@ unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer
|
||||
z_xc(0,0,base_pointer_arg,0,base_pointer_arg);
|
||||
|
||||
bind(doXC);
|
||||
add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
|
||||
add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE.
|
||||
if (VM_Version::has_ExecuteExtensions()) {
|
||||
z_exrl(dst_len, XC_template); // Execute XC with var. len.
|
||||
z_exrl(dst_len, XC_template); // Execute XC with var. len.
|
||||
} else {
|
||||
z_larl(tmp_reg, XC_template);
|
||||
z_ex(dst_len,0,Z_R0,tmp_reg); // Execute XC with var. len.
|
||||
z_larl(odd_tmp_reg, XC_template);
|
||||
z_ex(dst_len,0,Z_R0,odd_tmp_reg); // Execute XC with var. len.
|
||||
}
|
||||
// z_bru(done); // fallthru
|
||||
|
||||
@ -4463,7 +4461,7 @@ unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
|
||||
// Compiler ensures base is doubleword aligned and cnt is #doublewords.
|
||||
// Emitter does not KILL cnt and base arguments, since they need to be copied to
|
||||
// work registers anyway.
|
||||
// Actually, only r0, r1, r4, and r5 (which are work registers) are killed.
|
||||
// Actually, only r0, r1, (which are work registers) and odd_tmp_reg are killed.
|
||||
//
|
||||
// For very large arrays, exploit MVCLE H/W support.
|
||||
// MVCLE instruction automatically exploits H/W-optimized page mover.
|
||||
@ -4471,9 +4469,7 @@ unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) {
|
||||
// - All full pages are cleared with the page mover H/W assist.
|
||||
// - Remaining bytes are again cleared by a series of XC to self.
|
||||
//
|
||||
unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) {
|
||||
// Src_addr is evenReg.
|
||||
// Src_len is odd_Reg.
|
||||
unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg) {
|
||||
|
||||
int block_start = offset();
|
||||
Register dst_len = Z_R1; // Holds dst len for MVCLE.
|
||||
@ -4486,11 +4482,10 @@ unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_point
|
||||
|
||||
// Prepare other args to MVCLE.
|
||||
z_lgr(dst_addr, base_pointer_arg);
|
||||
// Indicate unused result.
|
||||
(void) clear_reg(src_len, true, false); // Src len of MVCLE is zero.
|
||||
|
||||
// Clear.
|
||||
MacroAssembler::move_long_ext(dst_addr, src_addr, 0);
|
||||
// Pass 0 as source length to MVCLE: destination will be filled with padding byte 0.
|
||||
// The even register of the register pair is not killed.
|
||||
(void) clear_reg(odd_tmp_reg, true, false); // Src len of MVCLE is zero.
|
||||
MacroAssembler::move_long_ext(dst_addr, as_Register(odd_tmp_reg->encoding() - 1), 0);
|
||||
BLOCK_COMMENT("} Clear_Array_Const_Big");
|
||||
|
||||
int block_end = offset();
|
||||
|
@ -828,9 +828,9 @@ class MacroAssembler: public Assembler {
|
||||
//--------------------------
|
||||
//--- Operations on arrays.
|
||||
//--------------------------
|
||||
unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len);
|
||||
unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register odd_tmp_reg);
|
||||
unsigned int Clear_Array_Const(long cnt, Register base);
|
||||
unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len);
|
||||
unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register odd_tmp_reg);
|
||||
unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg,
|
||||
Register cnt_reg,
|
||||
Register tmp1_reg, Register tmp2_reg);
|
||||
|
@ -474,6 +474,19 @@ reg_class z_long_reg(
|
||||
/*Z_R15_H,Z_R15*/ // SP
|
||||
);
|
||||
|
||||
// z_long_reg without even registers
|
||||
reg_class z_long_odd_reg(
|
||||
/*Z_R0_H,Z_R0*/ // R0
|
||||
/*Z_R1_H,Z_R1*/
|
||||
Z_R3_H,Z_R3,
|
||||
Z_R5_H,Z_R5,
|
||||
Z_R7_H,Z_R7,
|
||||
Z_R9_H,Z_R9,
|
||||
Z_R11_H,Z_R11,
|
||||
Z_R13_H,Z_R13
|
||||
/*Z_R14_H,Z_R14,*/ // return_pc
|
||||
/*Z_R15_H,Z_R15*/ // SP
|
||||
);
|
||||
|
||||
// Special Class for Condition Code Flags Register
|
||||
|
||||
@ -3378,6 +3391,7 @@ operand iRegL() %{
|
||||
match(RegL);
|
||||
match(revenRegL);
|
||||
match(roddRegL);
|
||||
match(allRoddRegL);
|
||||
match(rarg1RegL);
|
||||
match(rarg5RegL);
|
||||
format %{ %}
|
||||
@ -3400,6 +3414,14 @@ operand roddRegL() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// available odd registers for iRegL
|
||||
operand allRoddRegL() %{
|
||||
constraint(ALLOC_IN_RC(z_long_odd_reg));
|
||||
match(iRegL);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand rarg1RegL() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg1_long_reg));
|
||||
match(iRegL);
|
||||
@ -9899,23 +9921,23 @@ instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy,
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
|
||||
instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
|
||||
effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
|
||||
ins_cost(200);
|
||||
// TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to optimized constant loader.
|
||||
format %{ "ClearArrayConstBig $cnt,$base" %}
|
||||
ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
|
||||
ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $tmpL$$Register); %}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
|
||||
instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
|
||||
effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
|
||||
ins_cost(300);
|
||||
// TODO: s390 port size(FIXED_SIZE); // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
|
||||
format %{ "ClearArrayVar $cnt,$base" %}
|
||||
ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
|
||||
ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $tmpL$$Register); %}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user