8154867: PPC64: Better byte behavior

Reviewed-by: goetz
This commit is contained in:
Martin Doerr 2016-04-22 10:46:08 +02:00
parent 4849577827
commit 2ee49b78fe
5 changed files with 111 additions and 8 deletions

@ -360,7 +360,7 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
length.set_instruction(x->length());
length.load_item();
}
if (needs_store_check) {
if (needs_store_check || x->check_boolean()) {
value.load_item();
} else {
value.load_for_store(x->elt_type());
@ -407,7 +407,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
true /* do_load */, false /* patch */, NULL);
}
__ move(value.result(), array_addr, null_check_info);
LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
__ move(result, array_addr, null_check_info);
if (obj_store) {
// Precise card mark.
post_barrier(LIR_OprFact::address(array_addr), value.result());

@ -169,6 +169,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state, Register Rsc
case ltos: ld(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
break;
case btos: // fall through
case ztos: // fall through
case ctos: // fall through
case stos: // fall through
case itos: lwz(R17_tos, in_bytes(JvmtiThreadState::earlyret_value_offset()), RjvmtiState);
@ -294,6 +295,7 @@ void InterpreterMacroAssembler::push(TosState state) {
switch (state) {
case atos: push_ptr(); break;
case btos:
case ztos:
case ctos:
case stos:
case itos: push_i(); break;
@ -309,6 +311,7 @@ void InterpreterMacroAssembler::pop(TosState state) {
switch (state) {
case atos: pop_ptr(); break;
case btos:
case ztos:
case ctos:
case stos:
case itos: pop_i(); break;
@ -749,6 +752,43 @@ void InterpreterMacroAssembler::merge_frames(Register Rsender_sp, Register retur
stdux(Rscratch2, R1_SP, Rscratch1); // atomically set *(SP = top_frame_sp) = **SP
}
void InterpreterMacroAssembler::narrow(Register result) {
Register ret_type = R11_scratch1;
ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
lbz(ret_type, in_bytes(ConstMethod::result_type_offset()), R11_scratch1);
Label notBool, notByte, notChar, done;
// common case first
cmpwi(CCR0, ret_type, T_INT);
beq(CCR0, done);
cmpwi(CCR0, ret_type, T_BOOLEAN);
bne(CCR0, notBool);
andi(result, result, 0x1);
b(done);
bind(notBool);
cmpwi(CCR0, ret_type, T_BYTE);
bne(CCR0, notByte);
extsb(result, result);
b(done);
bind(notByte);
cmpwi(CCR0, ret_type, T_CHAR);
bne(CCR0, notChar);
andi(result, result, 0xffff);
b(done);
bind(notChar);
// cmpwi(CCR0, ret_type, T_SHORT); // all that's left
// bne(CCR0, done);
extsh(result, result);
// Nothing to do for T_INT
bind(done);
}
// Remove activation.
//
// Unlock the receiver if this is a synchronized method.

@ -140,6 +140,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
void get_cpool_and_tags(Register Rcpool, Register Rtags);
void is_a(Label& L);
void narrow(Register result);
// Java Call Helpers
void call_from_interpreter(Register Rtarget_method, Register Rret_addr, Register Rscratch1, Register Rscratch2);

@ -655,6 +655,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
switch (state) {
case ltos:
case btos:
case ztos:
case ctos:
case stos:
case atos:
@ -701,6 +702,7 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i
switch (state) {
case ltos:
case btos:
case ztos:
case ctos:
case stos:
case atos:
@ -2092,12 +2094,14 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state
// Copied from TemplateTable::_return.
// Restoration of lr done by remove_activation.
switch (state) {
// Narrow result if state is itos but result type is smaller.
case itos: __ narrow(R17_tos); /* fall through */
case ltos:
case btos:
case ztos:
case ctos:
case stos:
case atos:
case itos: __ mr(R3_RET, R17_tos); break;
case atos: __ mr(R3_RET, R17_tos); break;
case ftos:
case dtos: __ fmr(F1_RET, F15_ftos); break;
case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
@ -2157,6 +2161,10 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
bname = "trace_code_btos {";
tsize = 2;
break;
case ztos:
bname = "trace_code_ztos {";
tsize = 2;
break;
case ctos:
bname = "trace_code_ctos {";
tsize = 2;

@ -170,6 +170,7 @@ void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Reg
switch (new_bc) {
case Bytecodes::_fast_aputfield:
case Bytecodes::_fast_bputfield:
case Bytecodes::_fast_zputfield:
case Bytecodes::_fast_cputfield:
case Bytecodes::_fast_dputfield:
case Bytecodes::_fast_fputfield:
@ -981,9 +982,21 @@ void TemplateTable::bastore() {
Rarray = R12_scratch2,
Rscratch = R3_ARG1;
__ pop_i(Rindex);
__ pop_ptr(Rarray);
// tos: val
// Rarray: array ptr (popped by index_check)
__ index_check(Rarray, Rindex, 0, Rscratch, Rarray);
// Need to check whether array is boolean or byte
// since both types share the bastore bytecode.
__ load_klass(Rscratch, Rarray);
__ lwz(Rscratch, in_bytes(Klass::layout_helper_offset()), Rscratch);
int diffbit = exact_log2(Klass::layout_helper_boolean_diffbit());
__ testbitdi(CCR0, R0, Rscratch, diffbit);
Label L_skip;
__ bfalse(CCR0, L_skip);
__ andi(R17_tos, R17_tos, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
__ bind(L_skip);
__ index_check_without_pop(Rarray, Rindex, 0, Rscratch, Rarray);
__ stb(R17_tos, arrayOopDesc::base_offset_in_bytes(T_BYTE), Rarray);
}
@ -2108,12 +2121,16 @@ void TemplateTable::_return(TosState state) {
__ remove_activation(state, /* throw_monitor_exception */ true);
// Restoration of lr done by remove_activation.
switch (state) {
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
case itos: __ narrow(R17_tos); /* fall through */
case ltos:
case btos:
case ztos:
case ctos:
case stos:
case atos:
case itos: __ mr(R3_RET, R17_tos); break;
case atos: __ mr(R3_RET, R17_tos); break;
case ftos:
case dtos: __ fmr(F1_RET, F15_ftos); break;
case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
@ -2518,6 +2535,21 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
__ beq(CCR6, Lacquire); // Volatile?
__ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
__ align(32, 28, 28); // Align load.
// __ bind(Lztos); (same code as btos)
__ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
assert(branch_table[ztos] == 0, "can't compute twice");
branch_table[ztos] = __ pc(); // non-volatile_entry point
__ lbzx(R17_tos, Rclass_or_obj, Roffset);
__ extsb(R17_tos, R17_tos);
__ push(ztos);
if (!is_static) {
// use btos rewriting, no truncating to t/f bit is needed for getfield.
patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
}
__ beq(CCR6, Lacquire); // Volatile?
__ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
__ align(32, 28, 28); // Align load.
// __ bind(Lctos);
__ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
@ -2618,6 +2650,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo
case Bytecodes::_fast_aputfield: __ push_ptr(); offs+= Interpreter::stackElementSize; break;
case Bytecodes::_fast_iputfield: // Fall through
case Bytecodes::_fast_bputfield: // Fall through
case Bytecodes::_fast_zputfield: // Fall through
case Bytecodes::_fast_cputfield: // Fall through
case Bytecodes::_fast_sputfield: __ push_i(); offs+= Interpreter::stackElementSize; break;
case Bytecodes::_fast_lputfield: __ push_l(); offs+=2*Interpreter::stackElementSize; break;
@ -2658,6 +2691,7 @@ void TemplateTable::jvmti_post_field_mod(Register Rcache, Register Rscratch, boo
case Bytecodes::_fast_aputfield: __ pop_ptr(); break;
case Bytecodes::_fast_iputfield: // Fall through
case Bytecodes::_fast_bputfield: // Fall through
case Bytecodes::_fast_zputfield: // Fall through
case Bytecodes::_fast_cputfield: // Fall through
case Bytecodes::_fast_sputfield: __ pop_i(); break;
case Bytecodes::_fast_lputfield: __ pop_l(); break;
@ -2824,6 +2858,21 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
}
__ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
__ align(32, 28, 28); // Align pop.
// __ bind(Lztos);
__ release(); // Volatile entry point (one instruction before non-volatile_entry point).
assert(branch_table[ztos] == 0, "can't compute twice");
branch_table[ztos] = __ pc(); // non-volatile_entry point
__ pop(ztos);
if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
__ andi(R17_tos, R17_tos, 0x1);
__ stbx(R17_tos, Rclass_or_obj, Roffset);
if (!is_static) { patch_bytecode(Bytecodes::_fast_zputfield, Rbc, Rscratch, true, byte_no); }
if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
__ beq(CR_is_vol, Lvolatile); // Volatile?
}
__ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
__ align(32, 28, 28); // Align pop.
// __ bind(Lctos);
__ release(); // Volatile entry point (one instruction before non-volatile_entry point).
@ -2949,6 +2998,9 @@ void TemplateTable::fast_storefield(TosState state) {
__ stdx(R17_tos, Rclass_or_obj, Roffset);
break;
case Bytecodes::_fast_zputfield:
__ andi(R17_tos, R17_tos, 0x1); // boolean is true if LSB is 1
// fall through to bputfield
case Bytecodes::_fast_bputfield:
__ stbx(R17_tos, Rclass_or_obj, Roffset);
break;