8343430: RISC-V: C2: Remove old trampoline call

Reviewed-by: mli, rehn
This commit is contained in:
Fei Yang 2024-11-13 00:07:44 +00:00
parent b26e4952e9
commit eb40a88f40
12 changed files with 86 additions and 623 deletions

View File

@ -316,7 +316,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
relocInfo::static_call_type);
address call = __ reloc_call(resolve);
if (call == nullptr) {
ce->bailout("trampoline stub overflow");
ce->bailout("reloc call address stub overflow");
return;
}
ce->add_call_info_here(info());

View File

@ -1346,7 +1346,7 @@ void LIR_Assembler::align_call(LIR_Code code) {
void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
address call = __ reloc_call(Address(op->addr(), rtype));
if (call == nullptr) {
bailout("trampoline stub overflow");
bailout("reloc call address stub overflow");
return;
}
add_call_info(code_offset(), op->info());
@ -1356,7 +1356,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
address call = __ ic_call(op->addr());
if (call == nullptr) {
bailout("trampoline stub overflow");
bailout("reloc call address stub overflow");
return;
}
add_call_info(code_offset(), op->info());

View File

@ -65,10 +65,10 @@ private:
void deoptimize_trap(CodeEmitInfo *info);
enum {
// See emit_static_call_stub for detail
// CompiledDirectCall::to_interp_stub_size() (14) + CompiledDirectCall::to_trampoline_stub_size() (1 + 3 + address)
_call_stub_size = 14 * MacroAssembler::instruction_size +
(MacroAssembler::instruction_size + MacroAssembler::NativeShortCall::trampoline_size),
// call stub: CompiledDirectCall::to_interp_stub_size() +
// CompiledDirectCall::to_trampoline_stub_size()
_call_stub_size = 11 * MacroAssembler::instruction_size +
1 * MacroAssembler::instruction_size + wordSize,
// See emit_exception_handler for detail
// verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
_exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller

View File

@ -28,61 +28,6 @@
#include "asm/codeBuffer.inline.hpp"
#include "asm/macroAssembler.hpp"
void CodeBuffer::share_trampoline_for(address dest, int caller_offset) {
if (_shared_trampoline_requests == nullptr) {
constexpr unsigned init_size = 8;
constexpr unsigned max_size = 256;
_shared_trampoline_requests = new (mtCompiler)SharedTrampolineRequests(init_size, max_size);
}
bool created;
Offsets* offsets = _shared_trampoline_requests->put_if_absent(dest, &created);
if (created) {
_shared_trampoline_requests->maybe_grow();
}
offsets->add(caller_offset);
_finalize_stubs = true;
}
#define __ masm.
static bool emit_shared_trampolines(CodeBuffer* cb, CodeBuffer::SharedTrampolineRequests* requests) {
if (requests == nullptr) {
return true;
}
assert(UseTrampolines, "We are not using trampolines");
MacroAssembler masm(cb);
auto emit = [&](address dest, const CodeBuffer::Offsets &offsets) {
assert(cb->stubs()->remaining() >= MacroAssembler::max_reloc_call_stub_size(), "pre-allocated trampolines");
LinkedListIterator<int> it(offsets.head());
int offset = *it.next();
address stub = __ emit_trampoline_stub(offset, dest);
assert(stub, "pre-allocated trampolines");
address reloc_pc = cb->stubs()->end() - MacroAssembler::NativeShortCall::trampoline_size;
while (!it.is_empty()) {
offset = *it.next();
address caller_pc = cb->insts()->start() + offset;
cb->stubs()->relocate(reloc_pc, trampoline_stub_Relocation::spec(caller_pc));
}
return true;
};
assert(requests->number_of_entries() >= 1, "at least one");
const int total_requested_size = MacroAssembler::max_reloc_call_stub_size() * requests->number_of_entries();
if (cb->stubs()->maybe_expand_to_ensure_remaining(total_requested_size) && cb->blob() == nullptr) {
return false;
}
requests->iterate(emit);
return true;
}
#undef __
bool CodeBuffer::pd_finalize_stubs() {
return emit_shared_stubs_to_interp<MacroAssembler>(this, _shared_stub_to_interp_requests)
&& emit_shared_trampolines(this, _shared_trampoline_requests);
return emit_shared_stubs_to_interp<MacroAssembler>(this, _shared_stub_to_interp_requests);
}

View File

@ -33,8 +33,6 @@ private:
public:
void flush_bundle(bool start_new_bundle) {}
static bool supports_shared_stubs() { return UseTrampolines; }
void share_trampoline_for(address dest, int caller_offset);
static bool supports_shared_stubs() { return false; }
#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP

View File

@ -69,9 +69,9 @@ int CompiledDirectCall::to_interp_stub_size() {
}
int CompiledDirectCall::to_trampoline_stub_size() {
// We count instructions and an additional alignment nop.
// Trampoline stubs are always word aligned.
return MacroAssembler::max_reloc_call_stub_size();
// We count size of target address and an additional alignment nop.
// Reloc call address stubs are always word aligned.
return MacroAssembler::max_reloc_call_address_stub_size();
}
// Relocation entries for call stub, compiled java to interpreter.

View File

@ -119,8 +119,6 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt") \
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
"Use RVV instructions for left/right shift of BigInteger") \
product(bool, UseTrampolines, false, EXPERIMENTAL, \
"Far calls uses jal to trampoline.") \
product(bool, UseCtxFencei, false, EXPERIMENTAL, \
"Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")

View File

@ -967,26 +967,6 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
}
}
void MacroAssembler::load_link_jump(const address source, Register temp) {
assert(temp != noreg && temp != x0, "expecting a register");
assert(temp != x5, "temp register must not be x5.");
assert_cond(source != nullptr);
int64_t distance = source - pc();
assert(is_simm32(distance), "Must be");
auipc(temp, (int32_t)distance + 0x800);
ld(temp, Address(temp, ((int32_t)distance << 20) >> 20));
jalr(temp);
}
void MacroAssembler::jump_link(const address dest, Register temp) {
assert(UseTrampolines, "Must be");
assert_cond(dest != nullptr);
int64_t distance = dest - pc();
assert(is_simm21(distance), "Must be");
assert((distance % 2) == 0, "Must be");
jal(x1, distance);
}
void MacroAssembler::j(const address dest, Register temp) {
assert(CodeCache::contains(dest), "Must be");
assert_cond(dest != nullptr);
@ -4282,46 +4262,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
zero_extend(dst, dst, 32);
}
// Maybe emit a call via a trampoline. If the code cache is small
// trampolines won't be emitted.
address MacroAssembler::trampoline_call(Address entry) {
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
entry.rspec().type() == relocInfo::static_call_type ||
entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
address target = entry.target();
// We need a trampoline if branches are far.
if (!in_scratch_emit_size()) {
if (entry.rspec().type() == relocInfo::runtime_call_type) {
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
code()->share_trampoline_for(entry.target(), offset());
} else {
address stub = emit_trampoline_stub(offset(), target);
if (stub == nullptr) {
postcond(pc() == badAddress);
return nullptr; // CodeCache is full
}
}
}
target = pc();
address call_pc = pc();
#ifdef ASSERT
if (entry.rspec().type() != relocInfo::runtime_call_type) {
assert_alignment(call_pc);
}
#endif
relocate(entry.rspec(), [&] {
jump_link(target, t0);
});
postcond(pc() != badAddress);
return call_pc;
}
address MacroAssembler::load_and_call(Address entry) {
address MacroAssembler::reloc_call(Address entry, Register tmp) {
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
entry.rspec().type() == relocInfo::static_call_type ||
@ -4330,7 +4271,7 @@ address MacroAssembler::load_and_call(Address entry) {
address target = entry.target();
if (!in_scratch_emit_size()) {
address stub = emit_address_stub(offset(), target);
address stub = emit_reloc_call_address_stub(offset(), target);
if (stub == nullptr) {
postcond(pc() == badAddress);
return nullptr; // CodeCache is full
@ -4343,8 +4284,13 @@ address MacroAssembler::load_and_call(Address entry) {
assert_alignment(call_pc);
}
#endif
// The relocation created while emitting the stub will ensure this
// call instruction is subsequently patched to call the stub.
relocate(entry.rspec(), [&] {
load_link_jump(target, t1);
auipc(tmp, 0);
ld(tmp, Address(tmp, 0));
jalr(tmp);
});
postcond(pc() != badAddress);
@ -4404,8 +4350,21 @@ int MacroAssembler::ic_check(int end_alignment) {
return uep_offset;
}
address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) {
address stub = start_a_stub(max_reloc_call_stub_size());
// Emit an address stub for a call to a target which is too far away.
// Note that we only put the target address of the call in the stub.
//
// code sequences:
//
// call-site:
// load target address from stub
// jump-and-link target address
//
// Related address stub for this call site in the stub section:
// alignment nop
// target address
address MacroAssembler::emit_reloc_call_address_stub(int insts_call_instruction_offset, address dest) {
address stub = start_a_stub(max_reloc_call_address_stub_size());
if (stub == nullptr) {
return nullptr; // CodeBuffer::expand failed
}
@ -4432,67 +4391,9 @@ address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, add
return stub_start_addr;
}
// Emit a trampoline stub for a call to a target which is too far away.
//
// code sequences:
//
// call-site:
// branch-and-link to <destination> or <trampoline stub>
//
// Related trampoline stub for this call site in the stub section:
// load the call target from the constant pool
// branch (RA still points to the call site above)
address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
address dest) {
// Max stub size: alignment nop, TrampolineStub.
address stub = start_a_stub(max_reloc_call_stub_size());
if (stub == nullptr) {
return nullptr; // CodeBuffer::expand failed
}
assert(UseTrampolines, "Must be using trampos.");
// We are always 4-byte aligned here.
assert_alignment(pc());
// Create a trampoline stub relocation which relates this trampoline stub
// with the call instruction at insts_call_instruction_offset in the
// instructions code-section.
// Make sure the address of destination 8-byte aligned after 3 instructions.
align(wordSize, MacroAssembler::NativeShortCall::trampoline_data_offset);
RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
insts_call_instruction_offset);
const int stub_start_offset = offset();
relocate(rh, [&] {
// Now, create the trampoline stub's code:
// - load the call
// - call
Label target;
ld(t1, target); // auipc + ld
jr(t1); // jalr
bind(target);
assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset,
"should be");
assert(offset() % wordSize == 0, "bad alignment");
emit_int64((int64_t)dest);
});
const address stub_start_addr = addr_at(stub_start_offset);
end_a_stub();
return stub_start_addr;
}
int MacroAssembler::max_reloc_call_stub_size() {
// Max stub size: alignment nop, TrampolineStub.
if (UseTrampolines) {
return instruction_size + MacroAssembler::NativeShortCall::trampoline_size;
}
return instruction_size + wordSize;
int MacroAssembler::max_reloc_call_address_stub_size() {
// Max stub size: alignment nop, target address.
return 1 * instruction_size + wordSize;
}
int MacroAssembler::static_call_stub_size() {

View File

@ -469,9 +469,8 @@ class MacroAssembler: public Assembler {
return false;
}
address emit_address_stub(int insts_call_instruction_offset, address target);
address emit_trampoline_stub(int insts_call_instruction_offset, address target);
static int max_reloc_call_stub_size();
address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
static int max_reloc_call_address_stub_size();
void emit_static_call_stub();
static int static_call_stub_size();
@ -627,9 +626,6 @@ class MacroAssembler: public Assembler {
void bltz(Register Rs, const address dest);
void bgtz(Register Rs, const address dest);
private:
void load_link_jump(const address source, Register temp);
void jump_link(const address dest, Register temp);
public:
// We try to follow risc-v asm menomics.
// But as we don't layout a reachable GOT,
@ -1231,92 +1227,50 @@ public:
void get_polling_page(Register dest, relocInfo::relocType rtype);
void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
// RISCV64 OpenJDK uses four different types of calls:
// - direct call: jal pc_relative_offset
// This is the shortest and the fastest, but the offset has the range: +/-1MB.
// RISCV64 OpenJDK uses three different types of calls:
//
// - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
// This is longer than a direct call. The offset has
// the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
// requires indirect call.
// If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
// be used instead.
// The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
// range in the code cache requires indirect call.
// If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
// can be used instead.
// All instructions are embedded at a call site.
//
// - indirect call: movptr + jalr
// This too can reach anywhere in the address space, but it cannot be
// patched while code is running, so it must only be modified at a safepoint.
// This form of call is most suitable for targets at fixed addresses, which
// will never be patched.
// This can reach anywhere in the address space, but it cannot be patched
// while code is running, so it must only be modified at a safepoint.
// This form of call is most suitable for targets at fixed addresses,
// which will never be patched.
//
// - reloc call:
// This is only available in C1/C2-generated code (nmethod).
// This too can reach anywhere in the address space but is only available
// in C1/C2-generated code (nmethod).
//
// [Main code section]
// auipc
// ld <address_from_stub_section>
// jalr
//
// [Stub section]
// trampoline:
// address stub:
// <64-bit destination address>
//
// To change the destination we simply atomically store the new
// address in the stub section.
//
// - trampoline call (old reloc call / -XX:+UseTrampolines):
// This is only available in C1/C2-generated code (nmethod). It is a combination
// of a direct call, which is used if the destination of a call is in range,
// and a register-indirect call. It has the advantages of reaching anywhere in
// the RISCV address space and being patchable at runtime when the generated
// code is being executed by other threads.
//
// [Main code section]
// jal trampoline
// [Stub code section]
// trampoline:
// ld reg, pc + 8 (auipc + ld)
// jr reg
// <64-bit destination address>
//
// If the destination is in range when the generated code is moved to the code
// cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
// is not used.
// The optimization does not remove the trampoline from the stub section.
//
// This is necessary because the trampoline may well be redirected later when
// code is patched, and the new destination may not be reachable by a simple JAL
// instruction.
//
// To patch a trampoline call when the JAL can't reach, we first modify
// the 64-bit destination address in the trampoline, then modify the
// JAL to point to the trampoline, then flush the instruction cache to
// broadcast the change to all executing threads. See
// NativeCall::set_destination_mt_safe for the details.
//
// There is a benign race in that the other thread might observe the
// modified JAL before it observes the modified 64-bit destination
// address. That does not matter because the destination method has been
// invalidated, so there will be a trap at its start.
// For this to work, the destination address in the trampoline is
// always updated, even if we're not using the trampoline.
// --
// There is a benign race in that the other thread might observe the old
// 64-bit destination address before it observes the new address. That does
// not matter because the destination method has been invalidated, so there
// will be a trap at its start.
// Emit a direct call if the entry address will always be in range,
// otherwise a reloc call.
// Emit a reloc call and create a stub to hold the entry point address.
// Supported entry.rspec():
// - relocInfo::runtime_call_type
// - relocInfo::opt_virtual_call_type
// - relocInfo::static_call_type
// - relocInfo::virtual_call_type
//
// Return: the call PC or null if CodeCache is full.
address reloc_call(Address entry) {
return UseTrampolines ? trampoline_call(entry) : load_and_call(entry);
}
private:
address trampoline_call(Address entry);
address load_and_call(Address entry);
public:
// Return: the call PC or nullptr if CodeCache is full.
address reloc_call(Address entry, Register tmp = t1);
address ic_call(address entry, jint method_index = 0);
static int ic_check_size();
@ -1640,11 +1594,6 @@ public:
load_pc_relative_instruction_size = 2 * instruction_size // auipc, ld
};
enum NativeShortCall {
trampoline_size = 3 * instruction_size + wordSize,
trampoline_data_offset = 3 * instruction_size
};
static bool is_load_pc_relative_at(address branch);
static bool is_li16u_at(address instr);

View File

@ -46,264 +46,10 @@ bool NativeInstruction::is_call_at(address addr) {
return NativeCall::is_at(addr);
}
//-----------------------------------------------------------------------------
// NativeShortCallTrampoline
//
// Implements the trampoline part of reloc call - trampoline call.
class NativeShortCall;
class NativeShortCallTrampolineStub : public NativeInstruction {
private:
friend NativeShortCall;
enum RISCV_specific_constants {
trampoline_data_offset = 3 * NativeInstruction::instruction_size // auipc + ld + jr
};
address destination() const;
void set_destination(address new_destination);
static bool is_at(address addr);
static NativeShortCallTrampolineStub* at(address addr);
};
address NativeShortCallTrampolineStub::destination() const {
return ptr_at(trampoline_data_offset);
}
void NativeShortCallTrampolineStub::set_destination(address new_destination) {
set_ptr_at(trampoline_data_offset, new_destination);
OrderAccess::release();
}
bool NativeShortCallTrampolineStub::is_at(address addr) {
// Ensure that the stub is exactly
// ld t0, L--->auipc + ld
// jr t0
// L:
// judge inst + register + imm
// 1). check the instructions: auipc + ld + jalr
// 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
// 3). check if the offset in ld[31:20] equals the data_offset
assert_cond(addr != nullptr);
const int instr_size = NativeInstruction::instruction_size;
if (MacroAssembler::is_auipc_at(addr) &&
MacroAssembler::is_ld_at(addr + instr_size) &&
MacroAssembler::is_jalr_at(addr + 2 * instr_size) &&
(MacroAssembler::extract_rd(addr) == x6) &&
(MacroAssembler::extract_rd(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x6) &&
(Assembler::extract(Assembler::ld_instr(addr + 4), 31, 20) == trampoline_data_offset)) {
return true;
}
return false;
}
NativeShortCallTrampolineStub* NativeShortCallTrampolineStub::at(address addr) {
assert_cond(addr != nullptr);
assert(NativeShortCallTrampolineStub::is_at(addr), "no call trampoline found");
return (NativeShortCallTrampolineStub*)addr;
}
//-----------------------------------------------------------------------------
// NativeShortCall
//
// Implements the trampoline call, a short call with a trampoline, version of reloc call.
// Enabled by setting the experimental UseTrampolines to true.
class NativeShortCall: private NativeInstruction {
public:
enum RISCV_specific_constants {
return_address_offset = 1 * NativeInstruction::instruction_size // jal
};
address instruction_address() const { return addr_at(0); }
address next_instruction_address() const { return addr_at(return_address_offset); }
address return_address() const { return addr_at(return_address_offset); }
address destination() const;
address reloc_destination(address orig_address);
void set_destination(address dest);
void verify();
void print();
bool set_destination_mt_safe(address dest, bool assert_lock = true);
bool reloc_set_destination(address dest);
private:
address get_trampoline();
bool has_trampoline();
address trampoline_destination();
public:
static NativeShortCall* at(address addr);
static bool is_at(address addr);
static bool is_call_before(address return_address);
};
address NativeShortCall::destination() const {
address addr = instruction_address();
assert(MacroAssembler::is_jal_at(instruction_address()), "inst must be jal.");
address destination = MacroAssembler::target_addr_for_insn(instruction_address());
// Do we use a trampoline stub for this call?
CodeBlob* cb = CodeCache::find_blob(addr);
assert(cb && cb->is_nmethod(), "sanity");
nmethod *nm = (nmethod *)cb;
if (nm != nullptr && nm->stub_contains(destination) && NativeShortCallTrampolineStub::is_at(destination)) {
// Yes we do, so get the destination from the trampoline stub.
const address trampoline_stub_addr = destination;
destination = NativeShortCallTrampolineStub::at(trampoline_stub_addr)->destination();
}
return destination;
}
address NativeShortCall::reloc_destination(address orig_address) {
address addr = instruction_address();
if (NativeShortCall::is_at(addr)) {
NativeShortCall* call = NativeShortCall::at(addr);
if (call->has_trampoline()) {
return call->trampoline_destination();
}
}
if (orig_address != nullptr) {
// the extracted address from the instructions in address orig_addr
address new_addr = MacroAssembler::pd_call_destination(orig_address);
// If call is branch to self, don't try to relocate it, just leave it
// as branch to self. This happens during code generation if the code
// buffer expands. It will be relocated to the trampoline above once
// code generation is complete.
new_addr = (new_addr == orig_address) ? addr : new_addr;
return new_addr;
}
return MacroAssembler::pd_call_destination(addr);
}
void NativeShortCall::set_destination(address dest) {
assert(NativeShortCall::is_at(instruction_address()), "unexpected code at call site");
assert(is_jal(), "Should be jal instruction!");
intptr_t offset = (intptr_t)(dest - instruction_address());
assert((offset & 0x1) == 0, "bad alignment");
assert(Assembler::is_simm21(offset), "encoding constraint");
unsigned int insn = 0b1101111; // jal
address pInsn = (address)(&insn);
Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
set_int_at(0, insn);
}
void NativeShortCall::verify() {
assert(NativeShortCall::is_at(instruction_address()),
"unexpected code at call site: %p", instruction_address());
}
void NativeShortCall::print() {
assert(NativeShortCall::is_at(instruction_address()), "unexpected code at call site");
tty->print_cr(PTR_FORMAT ": jal/auipc,ld,jalr x1, offset/reg", p2i(instruction_address()));
}
// The important thing is that threads are able to execute this
// call instruction at all times. (cmodx)
//
// Used in the runtime linkage of calls; see class CompiledIC.
//
// Add parameter assert_lock to switch off assertion
// during code generation, where no lock is needed.
bool NativeShortCall::set_destination_mt_safe(address dest, bool assert_lock) {
assert(!assert_lock ||
(CodeCache_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
CompiledICLocker::is_safe(instruction_address()),
"concurrent code patching");
address call_addr = instruction_address();
assert(NativeCall::is_at(call_addr), "unexpected code at call site");
reloc_set_destination(dest);
ICache::invalidate_range(call_addr, instruction_size);
return true;
}
bool NativeShortCall::reloc_set_destination(address dest) {
address call_addr = instruction_address();
assert(NativeCall::is_at(call_addr), "unexpected code at call site");
// Patch the constant in the call's trampoline stub.
address trampoline_stub_addr = get_trampoline();
if (trampoline_stub_addr != nullptr) {
assert(!NativeShortCallTrampolineStub::is_at(dest), "chained trampolines");
NativeShortCallTrampolineStub::at(trampoline_stub_addr)->set_destination(dest);
}
// Patch the call.
if (Assembler::reachable_from_branch_at(call_addr, dest)) {
set_destination(dest);
} else {
assert (trampoline_stub_addr != nullptr, "we need a trampoline");
set_destination(trampoline_stub_addr);
}
return true;
}
address NativeShortCall::get_trampoline() {
address call_addr = instruction_address();
CodeBlob *code = CodeCache::find_blob(call_addr);
assert(code != nullptr, "Could not find the containing code blob");
address jal_destination = MacroAssembler::pd_call_destination(call_addr);
if (code != nullptr && code->contains(jal_destination) && NativeShortCallTrampolineStub::is_at(jal_destination)) {
return jal_destination;
}
if (code != nullptr && code->is_nmethod()) {
return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
}
return nullptr;
}
bool NativeShortCall::has_trampoline() {
return NativeShortCall::get_trampoline() != nullptr;
}
address NativeShortCall::trampoline_destination() {
return NativeShortCallTrampolineStub::at(get_trampoline())->destination();
}
NativeShortCall* NativeShortCall::at(address addr) {
assert_cond(addr != nullptr);
assert(NativeShortCall::is_at(addr), "unexpected code at call site: %p", addr);
NativeShortCall* call = (NativeShortCall*)(addr);
return call;
}
bool NativeShortCall::is_at(address addr) {
if (MacroAssembler::is_jal_at(addr)) {
if (MacroAssembler::extract_rd(addr) == x1) {
return true;
}
}
return false;
}
bool NativeShortCall::is_call_before(address return_address) {
return NativeShortCall::is_at(return_address - instruction_size);
}
//-----------------------------------------------------------------------------
// NativeFarCall
//
// Implements direct far calling loading an address from the stub section version of reloc call.
// This is the default (experimental flag UseTrampolines, default false).
class NativeFarCall: public NativeInstruction {
public:
@ -478,99 +224,51 @@ bool NativeFarCall::is_call_before(address return_address) {
// NativeCall
address NativeCall::instruction_address() const {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->instruction_address();
} else {
return NativeFarCall::at(addr_at(0))->instruction_address();
}
}
address NativeCall::next_instruction_address() const {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->next_instruction_address();
} else {
return NativeFarCall::at(addr_at(0))->next_instruction_address();
}
}
address NativeCall::return_address() const {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->return_address();
} else {
return NativeFarCall::at(addr_at(0))->return_address();
}
}
address NativeCall::destination() const {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->destination();
} else {
return NativeFarCall::at(addr_at(0))->destination();
}
}
address NativeCall::reloc_destination(address orig_address) {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->reloc_destination(orig_address);
} else {
return NativeFarCall::at(addr_at(0))->reloc_destination(orig_address);
}
}
void NativeCall::set_destination(address dest) {
if (UseTrampolines) {
NativeShortCall::at(addr_at(0))->set_destination(dest);
} else {
NativeFarCall::at(addr_at(0))->set_destination(dest);
}
}
void NativeCall::verify() {
if (UseTrampolines) {
NativeShortCall::at(addr_at(0))->verify();
} else {
NativeFarCall::at(addr_at(0))->verify();;
}
}
void NativeCall::print() {
if (UseTrampolines) {
NativeShortCall::at(addr_at(0))->print();
} else {
NativeFarCall::at(addr_at(0))->print();;
}
}
bool NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->set_destination_mt_safe(dest, assert_lock);
} else {
return NativeFarCall::at(addr_at(0))->set_destination_mt_safe(dest, assert_lock);
}
}
bool NativeCall::reloc_set_destination(address dest) {
if (UseTrampolines) {
return NativeShortCall::at(addr_at(0))->reloc_set_destination(dest);
} else {
return NativeFarCall::at(addr_at(0))->reloc_set_destination(dest);
}
}
bool NativeCall::is_at(address addr) {
if (UseTrampolines) {
return NativeShortCall::is_at(addr);
} else {
return NativeFarCall::is_at(addr);
}
}
bool NativeCall::is_call_before(address return_address) {
if (UseTrampolines) {
return NativeShortCall::is_call_before(return_address);
} else {
return NativeFarCall::is_call_before(return_address);
}
}
NativeCall* nativeCall_at(address addr) {
@ -583,11 +281,7 @@ NativeCall* nativeCall_at(address addr) {
NativeCall* nativeCall_before(address return_address) {
assert_cond(return_address != nullptr);
NativeCall* call = nullptr;
if (UseTrampolines) {
call = (NativeCall*)(return_address - NativeShortCall::return_address_offset);
} else {
call = (NativeCall*)(return_address - NativeFarCall::return_address_offset);
}
DEBUG_ONLY(call->verify());
return call;
}

View File

@ -124,12 +124,9 @@ class NativeCall: private NativeInstruction {
public:
static int byte_size() {
if (UseTrampolines) {
return NativeInstruction::instruction_size; // jal
} else {
return 3 * NativeInstruction::instruction_size; // auipc + ld + jalr
}
}
// Creation
friend NativeCall* nativeCall_at(address addr);
friend NativeCall* nativeCall_before(address return_address);

View File

@ -1238,38 +1238,25 @@ bool needs_acquiring_load_reserved(const Node *n)
int MachCallStaticJavaNode::ret_addr_offset()
{
if (UseTrampolines) {
return 1 * NativeInstruction::instruction_size; // jal
}
return 3 * NativeInstruction::instruction_size; // auipc + ld + jalr
}
int MachCallDynamicJavaNode::ret_addr_offset()
{
if (UseTrampolines) {
return NativeMovConstReg::movptr2_instruction_size + NativeInstruction::instruction_size; // movptr2, jal
}
return NativeMovConstReg::movptr2_instruction_size + (3 * NativeInstruction::instruction_size); // movptr2, auipc + ld + jal
}
int MachCallRuntimeNode::ret_addr_offset() {
// For generated stubs the call will be:
// auipc + ld + jalr
// Using trampos:
// jal(addr)
// or with far branches
// jal(trampoline_stub)
// for real runtime callouts it will be 8 instructions
// For address inside the code cache the call will be:
// auipc + jalr
// For real runtime callouts it will be 8 instructions
// see riscv_enc_java_to_runtime
// la(t0, retaddr) -> auipc + addi
// sd(t0, Address(xthread, JavaThread::last_Java_pc_offset())) -> sd
// movptr(t1, addr, offset, t0) -> lui + lui + slli + add
// jalr(t1, offset) -> jalr
if (CodeCache::contains(_entry_point)) {
if (UseTrampolines) {
return 1 * NativeInstruction::instruction_size;
}
return 3 * NativeInstruction::instruction_size;
return 2 * NativeInstruction::instruction_size;
} else {
return 8 * NativeInstruction::instruction_size;
}
@ -2433,10 +2420,8 @@ encode %{
// The NOP here is purely to ensure that eliding a call to
// JVM_EnsureMaterializedForStackWalk doesn't change the code size.
__ nop();
if (!UseTrampolines) {
__ nop();
__ nop();
}
__ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
} else {
int method_index = resolved_method_index(masm);
@ -2487,17 +2472,13 @@ encode %{
enc_class riscv_enc_java_to_runtime(method meth) %{
Assembler::IncompressibleRegion ir(masm); // Fixed length: see ret_addr_offset
// some calls to generated routines (arraycopy code) are scheduled
// by C2 as runtime calls. if so we can call them using a jr (they
// will be in a reachable segment) otherwise we have to use a jalr
// which loads the absolute address into a register.
// Some calls to generated routines (arraycopy code) are scheduled by C2
// as runtime calls. if so we can call them using a far call (they will be
// in the code cache, thus in a reachable segment) otherwise we have to use
// a movptr+jalr pair which loads the absolute address into a register.
address entry = (address)$meth$$method;
if (CodeCache::contains(entry)) {
address call = __ reloc_call(Address(entry, relocInfo::runtime_call_type));
if (call == nullptr) {
ciEnv::current()->record_failure("CodeCache is full");
return;
}
__ far_call(Address(entry, relocInfo::runtime_call_type));
__ post_call_nop();
} else {
Label retaddr;