8315801: [PPC64] JNI code should be more similar to the Panama implementation

Reviewed-by: rrich, lucy
This commit is contained in:
Martin Doerr 2023-11-17 11:49:21 +00:00
parent 8ec6b8de3b
commit 368e4f60a9
14 changed files with 53 additions and 140 deletions

View File

@ -801,9 +801,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
static int c_calling_convention_priv(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "not needed on AArch64");
// We return the amount of VMRegImpl stack slots we need to reserve for all
// the arguments NOT counting out_preserve_stack_slots.
@ -897,10 +895,9 @@ int SharedRuntime::vector_calling_convention(VMRegPair *regs,
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed)
{
int result = c_calling_convention_priv(sig_bt, regs, regs2, total_args_passed);
int result = c_calling_convention_priv(sig_bt, regs, total_args_passed);
guarantee(result >= 0, "Unsupported arguments configuration");
return result;
}
@ -1457,7 +1454,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Now figure out where the args must be stored and how much stack space
// they require.
int out_arg_slots;
out_arg_slots = c_calling_convention_priv(out_sig_bt, out_regs, nullptr, total_c_args);
out_arg_slots = c_calling_convention_priv(out_sig_bt, out_regs, total_c_args);
if (out_arg_slots < 0) {
return nullptr;

View File

@ -254,10 +254,7 @@ bool SharedRuntime::is_wide_vector(int size) {
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "not needed on arm");
int slot = 0;
int ireg = 0;
#ifdef __ABI_HARD__
@ -795,7 +792,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
out_sig_bt[argc++] = in_sig_bt[i];
}
int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, nullptr, total_c_args);
int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
// Since object arguments need to be wrapped, we must preserve space
// for those object arguments which come in registers (GPR_PARAMS maximum)

View File

@ -126,20 +126,20 @@ class Argument {
int _number; // The number of the argument.
public:
enum {
// Only 8 registers may contain integer parameters.
n_register_parameters = 8,
// Can have up to 8 floating registers.
n_float_register_parameters = 8,
// PPC C calling conventions.
// The first eight arguments are passed in int regs if they are int.
n_int_register_parameters_c = 8,
// The first thirteen float arguments are passed in float regs.
n_float_register_parameters_c = 13,
// Only the first 8 parameters are not placed on the stack. Aix disassembly
// shows that xlC places all float args after argument 8 on the stack AND
// in a register. This is not documented, but we follow this convention, too.
n_regs_not_on_stack_c = 8,
#ifdef VM_LITTLE_ENDIAN
// Floats are in the least significant word of an argument slot.
float_on_stack_offset_in_bytes_c = 0,
#else
// Although AIX runs on big endian CPU, float is in the most
// significant word of an argument slot.
float_on_stack_offset_in_bytes_c = AIX_ONLY(0) NOT_AIX(4),
#endif
n_int_register_parameters_j = 8, // duplicates num_java_iarg_registers
n_float_register_parameters_j = 13, // num_java_farg_registers
@ -150,7 +150,7 @@ class Argument {
int number() const { return _number; }
// Locating register-based arguments:
bool is_register() const { return _number < n_register_parameters; }
bool is_register() const { return _number < n_int_register_parameters_c; }
Register as_register() const {
assert(is_register(), "must be a register argument");

View File

@ -179,7 +179,7 @@ static void move_float(MacroAssembler* masm, int out_stk_bias,
case StorageType::STACK:
if (from_reg.segment_mask() == REG32_MASK) {
assert(to_reg.stack_size() == 4, "size should match");
// TODO: Check if AIX needs 4 Byte offset
// Note: Argument::float_on_stack_offset_in_bytes_c is handled by CallArranger
__ stfs(as_FloatRegister(from_reg), reg2offset(to_reg, out_stk_bias), R1_SP);
} else {
assert(to_reg.stack_size() == 8, "size should match");
@ -204,6 +204,7 @@ static void move_stack(MacroAssembler* masm, Register callerSP, int in_stk_bias,
case StorageType::FLOAT:
switch (from_reg.stack_size()) {
case 8: __ lfd(as_FloatRegister(to_reg), reg2offset(from_reg, in_stk_bias), callerSP); break;
// Note: Argument::float_on_stack_offset_in_bytes_c is handled by CallArranger
case 4: __ lfs(as_FloatRegister(to_reg), reg2offset(from_reg, in_stk_bias), callerSP); break;
default: ShouldNotReachHere();
}

View File

@ -741,7 +741,6 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
// Calling convention for calling C code.
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
// Calling conventions for C runtime calls and calls to JNI native methods.
//
@ -782,35 +781,20 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
"consistency");
// `Stk' counts stack slots. Due to alignment, 32 bit values occupy
// 2 such slots, like 64 bit values do.
const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats
const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size)
/ VMRegImpl::stack_slot_size);
const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size;
int i;
VMReg reg;
// Leave room for C-compatible ABI_REG_ARGS.
int stk = (frame::native_abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size;
int arg = 0;
int freg = 0;
bool stack_used = false;
// Avoid passing C arguments in the wrong stack slots.
#if defined(ABI_ELFv2)
assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 96,
"passing C arguments in wrong stack slots");
#else
assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112,
"passing C arguments in wrong stack slots");
#endif
// We fill-out regs AND regs2 if an argument must be passed in a
// register AND in a stack slot. If regs2 is null in such a
// situation, we bail-out with a fatal error.
for (int i = 0; i < total_args_passed; ++i, ++arg) {
// Initialize regs2 to BAD.
if (regs2 != nullptr) regs2[i].set_bad();
// Each argument corresponds to a slot in the Parameter Save Area (if not omitted)
int stk = (arg * 2) + additional_frame_header_slots;
switch(sig_bt[i]) {
//
// If arguments 0-7 are integers, they are passed in integer registers.
// Argument i is placed in iarg_reg[i].
@ -832,7 +816,7 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
reg = iarg_reg[arg];
} else {
reg = VMRegImpl::stack2reg(stk);
stk += inc_stk_for_longdouble;
stack_used = true;
}
regs[i].set2(reg);
break;
@ -844,43 +828,14 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
// in farg_reg[j] if argument i is the j-th float argument of this call.
//
case T_FLOAT:
#if defined(LINUX)
// Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
// in the least significant word of an argument slot.
#if defined(VM_LITTLE_ENDIAN)
#define FLOAT_WORD_OFFSET_IN_SLOT 0
#else
#define FLOAT_WORD_OFFSET_IN_SLOT 1
#endif
#elif defined(AIX)
// Although AIX runs on big endian CPU, float is in the most
// significant word of an argument slot.
#define FLOAT_WORD_OFFSET_IN_SLOT 0
#else
#error "unknown OS"
#endif
if (freg < Argument::n_float_register_parameters_c) {
// Put float in register ...
reg = farg_reg[freg];
++freg;
// Argument i for i > 8 is placed on the stack even if it's
// placed in a register (if it's a float arg). Aix disassembly
// shows that xlC places these float args on the stack AND in
// a register. This is not documented, but we follow this
// convention, too.
if (arg >= Argument::n_regs_not_on_stack_c) {
// ... and on the stack.
guarantee(regs2 != nullptr, "must pass float in register and stack slot");
VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
regs2[i].set1(reg2);
stk += inc_stk_for_intfloat;
}
} else {
// Put float on stack.
reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
stk += inc_stk_for_intfloat;
reg = VMRegImpl::stack2reg(stk + float_offset_in_slots);
stack_used = true;
}
regs[i].set1(reg);
break;
@ -890,23 +845,10 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
// Put double in register ...
reg = farg_reg[freg];
++freg;
// Argument i for i > 8 is placed on the stack even if it's
// placed in a register (if it's a double arg). Aix disassembly
// shows that xlC places these float args on the stack AND in
// a register. This is not documented, but we follow this
// convention, too.
if (arg >= Argument::n_regs_not_on_stack_c) {
// ... and on the stack.
guarantee(regs2 != nullptr, "must pass float in register and stack slot");
VMReg reg2 = VMRegImpl::stack2reg(stk);
regs2[i].set2(reg2);
stk += inc_stk_for_longdouble;
}
} else {
// Put double on stack.
reg = VMRegImpl::stack2reg(stk);
stk += inc_stk_for_longdouble;
stack_used = true;
}
regs[i].set2(reg);
break;
@ -921,7 +863,17 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
}
}
return align_up(stk, 2);
// Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
#if defined(ABI_ELFv2)
assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
// ABIv2 allows omitting the Parameter Save Area if the callee's prototype
// indicates that all parameters can be passed in registers.
return stack_used ? (arg * 2) : 0;
#else
// The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
// We have to add extra slots because ABIv1 uses a larger header.
return MAX2(arg, 8) * 2 + additional_frame_header_slots;
#endif
}
#endif // COMPILER2
@ -2140,7 +2092,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
VMRegPair *out_regs2 = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
BasicType* in_elem_bt = nullptr;
// Create the signature for the C call:
@ -2193,7 +2144,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// - *_slot_offset Indicates offset from SP in number of stack slots.
// - *_offset Indicates offset from SP in bytes.
int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) + // 1+2)
int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2)
SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
// Now the space for the inbound oop handle area.
@ -2358,11 +2309,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
} else if (out_regs[out].first()->is_FloatRegister()) {
freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
}
if (out_regs2[out].first()->is_Register()) {
reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true;
} else if (out_regs2[out].first()->is_FloatRegister()) {
freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true;
}
#endif // ASSERT
switch (in_sig_bt[in]) {
@ -2389,15 +2335,9 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
break;
case T_FLOAT:
float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
if (out_regs2[out].first()->is_valid()) {
float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
}
break;
case T_DOUBLE:
double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
if (out_regs2[out].first()->is_valid()) {
double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
}
break;
case T_ADDRESS:
fatal("found type (T_ADDRESS) in java args");
@ -2474,7 +2414,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Save argument registers and leave room for C-compatible ABI_REG_ARGS.
int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes);
__ mr(R11_scratch1, R1_SP);
RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2);
RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs);
// Do the call.
__ set_last_Java_frame(R11_scratch1, r_return_pc);
@ -2482,7 +2422,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
__ reset_last_Java_frame();
RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2);
RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs);
__ asm_assert_mem8_is_zero(thread_(pending_exception),
"no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C");

View File

@ -291,21 +291,7 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
__ bind(do_float);
__ lfs(floatSlot, 0, arg_java);
#if defined(LINUX)
// Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
// in the least significant word of an argument slot.
#if defined(VM_LITTLE_ENDIAN)
__ stfs(floatSlot, 0, arg_c);
#else
__ stfs(floatSlot, 4, arg_c);
#endif
#elif defined(AIX)
// Although AIX runs on big endian CPU, float is in most significant
// word of an argument slot.
__ stfs(floatSlot, 0, arg_c);
#else
#error "unknown OS"
#endif
__ stfs(floatSlot, Argument::float_on_stack_offset_in_bytes_c, arg_c);
__ addi(arg_java, arg_java, -BytesPerWord);
__ addi(arg_c, arg_c, BytesPerWord);
__ cmplwi(CCR0, fpcnt, max_fp_register_arguments);
@ -951,14 +937,14 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
in_bytes(ConstMethod::size_of_parameters_offset()), Rconst_method);
if (native_call) {
// If we're calling a native method, we reserve space for the worst-case signature
// handler varargs vector, which is max(Argument::n_register_parameters, parameter_count+2).
// handler varargs vector, which is max(Argument::n_int_register_parameters_c, parameter_count+2).
// We add two slots to the parameter_count, one for the jni
// environment and one for a possible native mirror.
Label skip_native_calculate_max_stack;
__ addi(Rtop_frame_size, Rsize_of_parameters, 2);
__ cmpwi(CCR0, Rtop_frame_size, Argument::n_register_parameters);
__ cmpwi(CCR0, Rtop_frame_size, Argument::n_int_register_parameters_c);
__ bge(CCR0, skip_native_calculate_max_stack);
__ li(Rtop_frame_size, Argument::n_register_parameters);
__ li(Rtop_frame_size, Argument::n_int_register_parameters_c);
__ bind(skip_native_calculate_max_stack);
__ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
__ sldi(Rtop_frame_size, Rtop_frame_size, Interpreter::logStackElementSize);
@ -1355,7 +1341,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// outgoing argument area.
//
// Not needed on PPC64.
//__ add(SP, SP, Argument::n_register_parameters*BytesPerWord);
//__ add(SP, SP, Argument::n_int_register_parameters_c*BytesPerWord);
assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register");
// Save across call to native method.

View File

@ -694,9 +694,7 @@ int SharedRuntime::vector_calling_convention(VMRegPair *regs,
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "not needed on riscv");
// We return the amount of VMRegImpl stack slots we need to reserve for all
// the arguments NOT counting out_preserve_stack_slots.
@ -1345,7 +1343,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Now figure out where the args must be stored and how much stack space
// they require.
int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, nullptr, total_c_args);
int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
// Compute framesize for the wrapper. We need to handlize all oops in
// incoming registers

View File

@ -760,9 +760,7 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "second VMRegPair array not used on this platform");
// Calling conventions for C runtime calls and calls to JNI native methods.
const VMReg z_iarg_reg[5] = {
@ -1457,7 +1455,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// *_slot_offset indicates offset from SP in #stack slots
// *_offset indicates offset from SP in #bytes
int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/nullptr, total_c_args) + // 1+2
int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2
SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
// Now the space for the inbound oop handle area.

View File

@ -978,9 +978,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "not needed on x86");
// We return the amount of VMRegImpl stack slots we need to reserve for all
// the arguments NOT counting out_preserve_stack_slots.
@ -1366,7 +1365,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Now figure out where the args must be stored and how much stack space
// they require.
int out_arg_slots;
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, nullptr, total_c_args);
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
// Compute framesize for the wrapper. We need to handlize all oops in
// registers a max of 2 on x86.

View File

@ -1053,9 +1053,8 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
assert(regs2 == nullptr, "not needed on x86");
// We return the amount of VMRegImpl stack slots we need to reserve for all
// the arguments NOT counting out_preserve_stack_slots.
@ -1803,7 +1802,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Now figure out where the args must be stored and how much stack space
// they require.
int out_arg_slots;
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, nullptr, total_c_args);
out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
// Compute framesize for the wrapper. We need to handlize all oops in
// incoming registers

View File

@ -118,7 +118,6 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
int total_args_passed) {
ShouldNotCallThis();
return 0;

View File

@ -118,7 +118,7 @@ CallingConvention* FrameMap::c_calling_convention(const BasicTypeArray* signatur
}
}
intptr_t out_preserve = SharedRuntime::c_calling_convention(sig_bt, regs, nullptr, sizeargs);
intptr_t out_preserve = SharedRuntime::c_calling_convention(sig_bt, regs, sizeargs);
LIR_OprList* args = new LIR_OprList(signature->length());
for (i = 0; i < sizeargs;) {
BasicType t = sig_bt[i];

View File

@ -1232,7 +1232,7 @@ bool CallLeafVectorNode::cmp( const Node &n ) const {
//------------------------------calling_convention-----------------------------
void CallRuntimeNode::calling_convention(BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt) const {
SharedRuntime::c_calling_convention(sig_bt, parm_regs, /*regs2=*/nullptr, argcnt);
SharedRuntime::c_calling_convention(sig_bt, parm_regs, argcnt);
}
void CallLeafVectorNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {

View File

@ -387,8 +387,7 @@ class SharedRuntime: AllStatic {
// to be filled by the c_calling_convention method. On other architectures,
// null is being passed as the second VMRegPair array, so arguments are either
// passed in a register OR in a stack slot.
static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2,
int total_args_passed);
static int c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed);
static int vector_calling_convention(VMRegPair *regs,
uint num_bits,