8343430: RISC-V: C2: Remove old trampoline call
Reviewed-by: mli, rehn
This commit is contained in:
parent
b26e4952e9
commit
eb40a88f40
@ -316,7 +316,7 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
|
||||
relocInfo::static_call_type);
|
||||
address call = __ reloc_call(resolve);
|
||||
if (call == nullptr) {
|
||||
ce->bailout("trampoline stub overflow");
|
||||
ce->bailout("reloc call address stub overflow");
|
||||
return;
|
||||
}
|
||||
ce->add_call_info_here(info());
|
||||
|
@ -1346,7 +1346,7 @@ void LIR_Assembler::align_call(LIR_Code code) {
|
||||
void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
|
||||
address call = __ reloc_call(Address(op->addr(), rtype));
|
||||
if (call == nullptr) {
|
||||
bailout("trampoline stub overflow");
|
||||
bailout("reloc call address stub overflow");
|
||||
return;
|
||||
}
|
||||
add_call_info(code_offset(), op->info());
|
||||
@ -1356,7 +1356,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
|
||||
void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
|
||||
address call = __ ic_call(op->addr());
|
||||
if (call == nullptr) {
|
||||
bailout("trampoline stub overflow");
|
||||
bailout("reloc call address stub overflow");
|
||||
return;
|
||||
}
|
||||
add_call_info(code_offset(), op->info());
|
||||
|
@ -65,10 +65,10 @@ private:
|
||||
void deoptimize_trap(CodeEmitInfo *info);
|
||||
|
||||
enum {
|
||||
// See emit_static_call_stub for detail
|
||||
// CompiledDirectCall::to_interp_stub_size() (14) + CompiledDirectCall::to_trampoline_stub_size() (1 + 3 + address)
|
||||
_call_stub_size = 14 * MacroAssembler::instruction_size +
|
||||
(MacroAssembler::instruction_size + MacroAssembler::NativeShortCall::trampoline_size),
|
||||
// call stub: CompiledDirectCall::to_interp_stub_size() +
|
||||
// CompiledDirectCall::to_trampoline_stub_size()
|
||||
_call_stub_size = 11 * MacroAssembler::instruction_size +
|
||||
1 * MacroAssembler::instruction_size + wordSize,
|
||||
// See emit_exception_handler for detail
|
||||
// verify_not_null_oop + far_call + should_not_reach_here + invalidate_registers(DEBUG_ONLY)
|
||||
_exception_handler_size = DEBUG_ONLY(584) NOT_DEBUG(548), // or smaller
|
||||
|
@ -28,61 +28,6 @@
|
||||
#include "asm/codeBuffer.inline.hpp"
|
||||
#include "asm/macroAssembler.hpp"
|
||||
|
||||
void CodeBuffer::share_trampoline_for(address dest, int caller_offset) {
|
||||
if (_shared_trampoline_requests == nullptr) {
|
||||
constexpr unsigned init_size = 8;
|
||||
constexpr unsigned max_size = 256;
|
||||
_shared_trampoline_requests = new (mtCompiler)SharedTrampolineRequests(init_size, max_size);
|
||||
}
|
||||
|
||||
bool created;
|
||||
Offsets* offsets = _shared_trampoline_requests->put_if_absent(dest, &created);
|
||||
if (created) {
|
||||
_shared_trampoline_requests->maybe_grow();
|
||||
}
|
||||
offsets->add(caller_offset);
|
||||
_finalize_stubs = true;
|
||||
}
|
||||
|
||||
#define __ masm.
|
||||
|
||||
static bool emit_shared_trampolines(CodeBuffer* cb, CodeBuffer::SharedTrampolineRequests* requests) {
|
||||
if (requests == nullptr) {
|
||||
return true;
|
||||
}
|
||||
assert(UseTrampolines, "We are not using trampolines");
|
||||
|
||||
MacroAssembler masm(cb);
|
||||
|
||||
auto emit = [&](address dest, const CodeBuffer::Offsets &offsets) {
|
||||
assert(cb->stubs()->remaining() >= MacroAssembler::max_reloc_call_stub_size(), "pre-allocated trampolines");
|
||||
LinkedListIterator<int> it(offsets.head());
|
||||
int offset = *it.next();
|
||||
address stub = __ emit_trampoline_stub(offset, dest);
|
||||
assert(stub, "pre-allocated trampolines");
|
||||
|
||||
address reloc_pc = cb->stubs()->end() - MacroAssembler::NativeShortCall::trampoline_size;
|
||||
while (!it.is_empty()) {
|
||||
offset = *it.next();
|
||||
address caller_pc = cb->insts()->start() + offset;
|
||||
cb->stubs()->relocate(reloc_pc, trampoline_stub_Relocation::spec(caller_pc));
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
assert(requests->number_of_entries() >= 1, "at least one");
|
||||
const int total_requested_size = MacroAssembler::max_reloc_call_stub_size() * requests->number_of_entries();
|
||||
if (cb->stubs()->maybe_expand_to_ensure_remaining(total_requested_size) && cb->blob() == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
requests->iterate(emit);
|
||||
return true;
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
||||
bool CodeBuffer::pd_finalize_stubs() {
|
||||
return emit_shared_stubs_to_interp<MacroAssembler>(this, _shared_stub_to_interp_requests)
|
||||
&& emit_shared_trampolines(this, _shared_trampoline_requests);
|
||||
return emit_shared_stubs_to_interp<MacroAssembler>(this, _shared_stub_to_interp_requests);
|
||||
}
|
||||
|
@ -33,8 +33,6 @@ private:
|
||||
|
||||
public:
|
||||
void flush_bundle(bool start_new_bundle) {}
|
||||
static bool supports_shared_stubs() { return UseTrampolines; }
|
||||
|
||||
void share_trampoline_for(address dest, int caller_offset);
|
||||
static bool supports_shared_stubs() { return false; }
|
||||
|
||||
#endif // CPU_RISCV_CODEBUFFER_RISCV_HPP
|
||||
|
@ -69,9 +69,9 @@ int CompiledDirectCall::to_interp_stub_size() {
|
||||
}
|
||||
|
||||
int CompiledDirectCall::to_trampoline_stub_size() {
|
||||
// We count instructions and an additional alignment nop.
|
||||
// Trampoline stubs are always word aligned.
|
||||
return MacroAssembler::max_reloc_call_stub_size();
|
||||
// We count size of target address and an additional alignment nop.
|
||||
// Reloc call address stubs are always word aligned.
|
||||
return MacroAssembler::max_reloc_call_address_stub_size();
|
||||
}
|
||||
|
||||
// Relocation entries for call stub, compiled java to interpreter.
|
||||
|
@ -119,8 +119,6 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Use Zvkn group extension, Zvkned, Zvknhb, Zvkb, Zvkt") \
|
||||
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
|
||||
"Use RVV instructions for left/right shift of BigInteger") \
|
||||
product(bool, UseTrampolines, false, EXPERIMENTAL, \
|
||||
"Far calls uses jal to trampoline.") \
|
||||
product(bool, UseCtxFencei, false, EXPERIMENTAL, \
|
||||
"Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")
|
||||
|
||||
|
@ -967,26 +967,6 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::load_link_jump(const address source, Register temp) {
|
||||
assert(temp != noreg && temp != x0, "expecting a register");
|
||||
assert(temp != x5, "temp register must not be x5.");
|
||||
assert_cond(source != nullptr);
|
||||
int64_t distance = source - pc();
|
||||
assert(is_simm32(distance), "Must be");
|
||||
auipc(temp, (int32_t)distance + 0x800);
|
||||
ld(temp, Address(temp, ((int32_t)distance << 20) >> 20));
|
||||
jalr(temp);
|
||||
}
|
||||
|
||||
void MacroAssembler::jump_link(const address dest, Register temp) {
|
||||
assert(UseTrampolines, "Must be");
|
||||
assert_cond(dest != nullptr);
|
||||
int64_t distance = dest - pc();
|
||||
assert(is_simm21(distance), "Must be");
|
||||
assert((distance % 2) == 0, "Must be");
|
||||
jal(x1, distance);
|
||||
}
|
||||
|
||||
void MacroAssembler::j(const address dest, Register temp) {
|
||||
assert(CodeCache::contains(dest), "Must be");
|
||||
assert_cond(dest != nullptr);
|
||||
@ -4282,46 +4262,7 @@ void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
|
||||
zero_extend(dst, dst, 32);
|
||||
}
|
||||
|
||||
// Maybe emit a call via a trampoline. If the code cache is small
|
||||
// trampolines won't be emitted.
|
||||
address MacroAssembler::trampoline_call(Address entry) {
|
||||
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
|
||||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
|
||||
entry.rspec().type() == relocInfo::static_call_type ||
|
||||
entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
|
||||
|
||||
address target = entry.target();
|
||||
|
||||
// We need a trampoline if branches are far.
|
||||
if (!in_scratch_emit_size()) {
|
||||
if (entry.rspec().type() == relocInfo::runtime_call_type) {
|
||||
assert(CodeBuffer::supports_shared_stubs(), "must support shared stubs");
|
||||
code()->share_trampoline_for(entry.target(), offset());
|
||||
} else {
|
||||
address stub = emit_trampoline_stub(offset(), target);
|
||||
if (stub == nullptr) {
|
||||
postcond(pc() == badAddress);
|
||||
return nullptr; // CodeCache is full
|
||||
}
|
||||
}
|
||||
}
|
||||
target = pc();
|
||||
|
||||
address call_pc = pc();
|
||||
#ifdef ASSERT
|
||||
if (entry.rspec().type() != relocInfo::runtime_call_type) {
|
||||
assert_alignment(call_pc);
|
||||
}
|
||||
#endif
|
||||
relocate(entry.rspec(), [&] {
|
||||
jump_link(target, t0);
|
||||
});
|
||||
|
||||
postcond(pc() != badAddress);
|
||||
return call_pc;
|
||||
}
|
||||
|
||||
address MacroAssembler::load_and_call(Address entry) {
|
||||
address MacroAssembler::reloc_call(Address entry, Register tmp) {
|
||||
assert(entry.rspec().type() == relocInfo::runtime_call_type ||
|
||||
entry.rspec().type() == relocInfo::opt_virtual_call_type ||
|
||||
entry.rspec().type() == relocInfo::static_call_type ||
|
||||
@ -4330,7 +4271,7 @@ address MacroAssembler::load_and_call(Address entry) {
|
||||
address target = entry.target();
|
||||
|
||||
if (!in_scratch_emit_size()) {
|
||||
address stub = emit_address_stub(offset(), target);
|
||||
address stub = emit_reloc_call_address_stub(offset(), target);
|
||||
if (stub == nullptr) {
|
||||
postcond(pc() == badAddress);
|
||||
return nullptr; // CodeCache is full
|
||||
@ -4343,8 +4284,13 @@ address MacroAssembler::load_and_call(Address entry) {
|
||||
assert_alignment(call_pc);
|
||||
}
|
||||
#endif
|
||||
|
||||
// The relocation created while emitting the stub will ensure this
|
||||
// call instruction is subsequently patched to call the stub.
|
||||
relocate(entry.rspec(), [&] {
|
||||
load_link_jump(target, t1);
|
||||
auipc(tmp, 0);
|
||||
ld(tmp, Address(tmp, 0));
|
||||
jalr(tmp);
|
||||
});
|
||||
|
||||
postcond(pc() != badAddress);
|
||||
@ -4404,8 +4350,21 @@ int MacroAssembler::ic_check(int end_alignment) {
|
||||
return uep_offset;
|
||||
}
|
||||
|
||||
address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, address dest) {
|
||||
address stub = start_a_stub(max_reloc_call_stub_size());
|
||||
// Emit an address stub for a call to a target which is too far away.
|
||||
// Note that we only put the target address of the call in the stub.
|
||||
//
|
||||
// code sequences:
|
||||
//
|
||||
// call-site:
|
||||
// load target address from stub
|
||||
// jump-and-link target address
|
||||
//
|
||||
// Related address stub for this call site in the stub section:
|
||||
// alignment nop
|
||||
// target address
|
||||
|
||||
address MacroAssembler::emit_reloc_call_address_stub(int insts_call_instruction_offset, address dest) {
|
||||
address stub = start_a_stub(max_reloc_call_address_stub_size());
|
||||
if (stub == nullptr) {
|
||||
return nullptr; // CodeBuffer::expand failed
|
||||
}
|
||||
@ -4432,67 +4391,9 @@ address MacroAssembler::emit_address_stub(int insts_call_instruction_offset, add
|
||||
return stub_start_addr;
|
||||
}
|
||||
|
||||
// Emit a trampoline stub for a call to a target which is too far away.
|
||||
//
|
||||
// code sequences:
|
||||
//
|
||||
// call-site:
|
||||
// branch-and-link to <destination> or <trampoline stub>
|
||||
//
|
||||
// Related trampoline stub for this call site in the stub section:
|
||||
// load the call target from the constant pool
|
||||
// branch (RA still points to the call site above)
|
||||
|
||||
address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
|
||||
address dest) {
|
||||
// Max stub size: alignment nop, TrampolineStub.
|
||||
address stub = start_a_stub(max_reloc_call_stub_size());
|
||||
if (stub == nullptr) {
|
||||
return nullptr; // CodeBuffer::expand failed
|
||||
}
|
||||
|
||||
assert(UseTrampolines, "Must be using trampos.");
|
||||
|
||||
// We are always 4-byte aligned here.
|
||||
assert_alignment(pc());
|
||||
|
||||
// Create a trampoline stub relocation which relates this trampoline stub
|
||||
// with the call instruction at insts_call_instruction_offset in the
|
||||
// instructions code-section.
|
||||
|
||||
// Make sure the address of destination 8-byte aligned after 3 instructions.
|
||||
align(wordSize, MacroAssembler::NativeShortCall::trampoline_data_offset);
|
||||
|
||||
RelocationHolder rh = trampoline_stub_Relocation::spec(code()->insts()->start() +
|
||||
insts_call_instruction_offset);
|
||||
const int stub_start_offset = offset();
|
||||
relocate(rh, [&] {
|
||||
// Now, create the trampoline stub's code:
|
||||
// - load the call
|
||||
// - call
|
||||
Label target;
|
||||
ld(t1, target); // auipc + ld
|
||||
jr(t1); // jalr
|
||||
bind(target);
|
||||
assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset,
|
||||
"should be");
|
||||
assert(offset() % wordSize == 0, "bad alignment");
|
||||
emit_int64((int64_t)dest);
|
||||
});
|
||||
|
||||
const address stub_start_addr = addr_at(stub_start_offset);
|
||||
|
||||
end_a_stub();
|
||||
|
||||
return stub_start_addr;
|
||||
}
|
||||
|
||||
int MacroAssembler::max_reloc_call_stub_size() {
|
||||
// Max stub size: alignment nop, TrampolineStub.
|
||||
if (UseTrampolines) {
|
||||
return instruction_size + MacroAssembler::NativeShortCall::trampoline_size;
|
||||
}
|
||||
return instruction_size + wordSize;
|
||||
int MacroAssembler::max_reloc_call_address_stub_size() {
|
||||
// Max stub size: alignment nop, target address.
|
||||
return 1 * instruction_size + wordSize;
|
||||
}
|
||||
|
||||
int MacroAssembler::static_call_stub_size() {
|
||||
|
@ -469,9 +469,8 @@ class MacroAssembler: public Assembler {
|
||||
return false;
|
||||
}
|
||||
|
||||
address emit_address_stub(int insts_call_instruction_offset, address target);
|
||||
address emit_trampoline_stub(int insts_call_instruction_offset, address target);
|
||||
static int max_reloc_call_stub_size();
|
||||
address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
|
||||
static int max_reloc_call_address_stub_size();
|
||||
|
||||
void emit_static_call_stub();
|
||||
static int static_call_stub_size();
|
||||
@ -627,9 +626,6 @@ class MacroAssembler: public Assembler {
|
||||
void bltz(Register Rs, const address dest);
|
||||
void bgtz(Register Rs, const address dest);
|
||||
|
||||
private:
|
||||
void load_link_jump(const address source, Register temp);
|
||||
void jump_link(const address dest, Register temp);
|
||||
public:
|
||||
// We try to follow risc-v asm menomics.
|
||||
// But as we don't layout a reachable GOT,
|
||||
@ -1231,92 +1227,50 @@ public:
|
||||
void get_polling_page(Register dest, relocInfo::relocType rtype);
|
||||
void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
|
||||
|
||||
// RISCV64 OpenJDK uses four different types of calls:
|
||||
// - direct call: jal pc_relative_offset
|
||||
// This is the shortest and the fastest, but the offset has the range: +/-1MB.
|
||||
// RISCV64 OpenJDK uses three different types of calls:
|
||||
//
|
||||
// - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
|
||||
// This is longer than a direct call. The offset has
|
||||
// the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
|
||||
// requires indirect call.
|
||||
// If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
|
||||
// be used instead.
|
||||
// The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
|
||||
// range in the code cache requires indirect call.
|
||||
// If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
|
||||
// can be used instead.
|
||||
// All instructions are embedded at a call site.
|
||||
//
|
||||
// - indirect call: movptr + jalr
|
||||
// This too can reach anywhere in the address space, but it cannot be
|
||||
// patched while code is running, so it must only be modified at a safepoint.
|
||||
// This form of call is most suitable for targets at fixed addresses, which
|
||||
// will never be patched.
|
||||
// This can reach anywhere in the address space, but it cannot be patched
|
||||
// while code is running, so it must only be modified at a safepoint.
|
||||
// This form of call is most suitable for targets at fixed addresses,
|
||||
// which will never be patched.
|
||||
//
|
||||
// - reloc call:
|
||||
// This is only available in C1/C2-generated code (nmethod).
|
||||
// This too can reach anywhere in the address space but is only available
|
||||
// in C1/C2-generated code (nmethod).
|
||||
//
|
||||
// [Main code section]
|
||||
// auipc
|
||||
// ld <address_from_stub_section>
|
||||
// jalr
|
||||
//
|
||||
// [Stub section]
|
||||
// trampoline:
|
||||
// address stub:
|
||||
// <64-bit destination address>
|
||||
//
|
||||
// To change the destination we simply atomically store the new
|
||||
// address in the stub section.
|
||||
//
|
||||
// - trampoline call (old reloc call / -XX:+UseTrampolines):
|
||||
// This is only available in C1/C2-generated code (nmethod). It is a combination
|
||||
// of a direct call, which is used if the destination of a call is in range,
|
||||
// and a register-indirect call. It has the advantages of reaching anywhere in
|
||||
// the RISCV address space and being patchable at runtime when the generated
|
||||
// code is being executed by other threads.
|
||||
//
|
||||
// [Main code section]
|
||||
// jal trampoline
|
||||
// [Stub code section]
|
||||
// trampoline:
|
||||
// ld reg, pc + 8 (auipc + ld)
|
||||
// jr reg
|
||||
// <64-bit destination address>
|
||||
//
|
||||
// If the destination is in range when the generated code is moved to the code
|
||||
// cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
|
||||
// is not used.
|
||||
// The optimization does not remove the trampoline from the stub section.
|
||||
//
|
||||
// This is necessary because the trampoline may well be redirected later when
|
||||
// code is patched, and the new destination may not be reachable by a simple JAL
|
||||
// instruction.
|
||||
//
|
||||
// To patch a trampoline call when the JAL can't reach, we first modify
|
||||
// the 64-bit destination address in the trampoline, then modify the
|
||||
// JAL to point to the trampoline, then flush the instruction cache to
|
||||
// broadcast the change to all executing threads. See
|
||||
// NativeCall::set_destination_mt_safe for the details.
|
||||
//
|
||||
// There is a benign race in that the other thread might observe the
|
||||
// modified JAL before it observes the modified 64-bit destination
|
||||
// address. That does not matter because the destination method has been
|
||||
// invalidated, so there will be a trap at its start.
|
||||
// For this to work, the destination address in the trampoline is
|
||||
// always updated, even if we're not using the trampoline.
|
||||
// --
|
||||
// There is a benign race in that the other thread might observe the old
|
||||
// 64-bit destination address before it observes the new address. That does
|
||||
// not matter because the destination method has been invalidated, so there
|
||||
// will be a trap at its start.
|
||||
|
||||
// Emit a direct call if the entry address will always be in range,
|
||||
// otherwise a reloc call.
|
||||
// Emit a reloc call and create a stub to hold the entry point address.
|
||||
// Supported entry.rspec():
|
||||
// - relocInfo::runtime_call_type
|
||||
// - relocInfo::opt_virtual_call_type
|
||||
// - relocInfo::static_call_type
|
||||
// - relocInfo::virtual_call_type
|
||||
//
|
||||
// Return: the call PC or null if CodeCache is full.
|
||||
address reloc_call(Address entry) {
|
||||
return UseTrampolines ? trampoline_call(entry) : load_and_call(entry);
|
||||
}
|
||||
private:
|
||||
address trampoline_call(Address entry);
|
||||
address load_and_call(Address entry);
|
||||
public:
|
||||
// Return: the call PC or nullptr if CodeCache is full.
|
||||
address reloc_call(Address entry, Register tmp = t1);
|
||||
|
||||
address ic_call(address entry, jint method_index = 0);
|
||||
static int ic_check_size();
|
||||
@ -1640,11 +1594,6 @@ public:
|
||||
load_pc_relative_instruction_size = 2 * instruction_size // auipc, ld
|
||||
};
|
||||
|
||||
enum NativeShortCall {
|
||||
trampoline_size = 3 * instruction_size + wordSize,
|
||||
trampoline_data_offset = 3 * instruction_size
|
||||
};
|
||||
|
||||
static bool is_load_pc_relative_at(address branch);
|
||||
static bool is_li16u_at(address instr);
|
||||
|
||||
|
@ -46,264 +46,10 @@ bool NativeInstruction::is_call_at(address addr) {
|
||||
return NativeCall::is_at(addr);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// NativeShortCallTrampoline
|
||||
//
|
||||
// Implements the trampoline part of reloc call - trampoline call.
|
||||
|
||||
class NativeShortCall;
|
||||
|
||||
class NativeShortCallTrampolineStub : public NativeInstruction {
|
||||
private:
|
||||
friend NativeShortCall;
|
||||
enum RISCV_specific_constants {
|
||||
trampoline_data_offset = 3 * NativeInstruction::instruction_size // auipc + ld + jr
|
||||
};
|
||||
|
||||
address destination() const;
|
||||
void set_destination(address new_destination);
|
||||
|
||||
static bool is_at(address addr);
|
||||
static NativeShortCallTrampolineStub* at(address addr);
|
||||
};
|
||||
|
||||
address NativeShortCallTrampolineStub::destination() const {
|
||||
return ptr_at(trampoline_data_offset);
|
||||
}
|
||||
|
||||
void NativeShortCallTrampolineStub::set_destination(address new_destination) {
|
||||
set_ptr_at(trampoline_data_offset, new_destination);
|
||||
OrderAccess::release();
|
||||
}
|
||||
|
||||
bool NativeShortCallTrampolineStub::is_at(address addr) {
|
||||
// Ensure that the stub is exactly
|
||||
// ld t0, L--->auipc + ld
|
||||
// jr t0
|
||||
// L:
|
||||
|
||||
// judge inst + register + imm
|
||||
// 1). check the instructions: auipc + ld + jalr
|
||||
// 2). check if auipc[11:7] == t0 and ld[11:7] == t0 and ld[19:15] == t0 && jr[19:15] == t0
|
||||
// 3). check if the offset in ld[31:20] equals the data_offset
|
||||
assert_cond(addr != nullptr);
|
||||
const int instr_size = NativeInstruction::instruction_size;
|
||||
if (MacroAssembler::is_auipc_at(addr) &&
|
||||
MacroAssembler::is_ld_at(addr + instr_size) &&
|
||||
MacroAssembler::is_jalr_at(addr + 2 * instr_size) &&
|
||||
(MacroAssembler::extract_rd(addr) == x6) &&
|
||||
(MacroAssembler::extract_rd(addr + instr_size) == x6) &&
|
||||
(MacroAssembler::extract_rs1(addr + instr_size) == x6) &&
|
||||
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x6) &&
|
||||
(Assembler::extract(Assembler::ld_instr(addr + 4), 31, 20) == trampoline_data_offset)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
NativeShortCallTrampolineStub* NativeShortCallTrampolineStub::at(address addr) {
|
||||
assert_cond(addr != nullptr);
|
||||
assert(NativeShortCallTrampolineStub::is_at(addr), "no call trampoline found");
|
||||
return (NativeShortCallTrampolineStub*)addr;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// NativeShortCall
|
||||
//
|
||||
// Implements the trampoline call, a short call with a trampoline, version of reloc call.
|
||||
// Enabled by setting the experimental UseTrampolines to true.
|
||||
|
||||
class NativeShortCall: private NativeInstruction {
|
||||
public:
|
||||
enum RISCV_specific_constants {
|
||||
return_address_offset = 1 * NativeInstruction::instruction_size // jal
|
||||
};
|
||||
|
||||
address instruction_address() const { return addr_at(0); }
|
||||
address next_instruction_address() const { return addr_at(return_address_offset); }
|
||||
address return_address() const { return addr_at(return_address_offset); }
|
||||
address destination() const;
|
||||
address reloc_destination(address orig_address);
|
||||
|
||||
void set_destination(address dest);
|
||||
void verify();
|
||||
void print();
|
||||
|
||||
bool set_destination_mt_safe(address dest, bool assert_lock = true);
|
||||
bool reloc_set_destination(address dest);
|
||||
|
||||
private:
|
||||
address get_trampoline();
|
||||
bool has_trampoline();
|
||||
address trampoline_destination();
|
||||
public:
|
||||
|
||||
static NativeShortCall* at(address addr);
|
||||
static bool is_at(address addr);
|
||||
static bool is_call_before(address return_address);
|
||||
};
|
||||
|
||||
address NativeShortCall::destination() const {
|
||||
address addr = instruction_address();
|
||||
assert(MacroAssembler::is_jal_at(instruction_address()), "inst must be jal.");
|
||||
|
||||
address destination = MacroAssembler::target_addr_for_insn(instruction_address());
|
||||
|
||||
// Do we use a trampoline stub for this call?
|
||||
CodeBlob* cb = CodeCache::find_blob(addr);
|
||||
assert(cb && cb->is_nmethod(), "sanity");
|
||||
nmethod *nm = (nmethod *)cb;
|
||||
if (nm != nullptr && nm->stub_contains(destination) && NativeShortCallTrampolineStub::is_at(destination)) {
|
||||
// Yes we do, so get the destination from the trampoline stub.
|
||||
const address trampoline_stub_addr = destination;
|
||||
destination = NativeShortCallTrampolineStub::at(trampoline_stub_addr)->destination();
|
||||
}
|
||||
|
||||
return destination;
|
||||
}
|
||||
|
||||
address NativeShortCall::reloc_destination(address orig_address) {
|
||||
address addr = instruction_address();
|
||||
if (NativeShortCall::is_at(addr)) {
|
||||
NativeShortCall* call = NativeShortCall::at(addr);
|
||||
if (call->has_trampoline()) {
|
||||
return call->trampoline_destination();
|
||||
}
|
||||
}
|
||||
if (orig_address != nullptr) {
|
||||
// the extracted address from the instructions in address orig_addr
|
||||
address new_addr = MacroAssembler::pd_call_destination(orig_address);
|
||||
// If call is branch to self, don't try to relocate it, just leave it
|
||||
// as branch to self. This happens during code generation if the code
|
||||
// buffer expands. It will be relocated to the trampoline above once
|
||||
// code generation is complete.
|
||||
new_addr = (new_addr == orig_address) ? addr : new_addr;
|
||||
return new_addr;
|
||||
}
|
||||
return MacroAssembler::pd_call_destination(addr);
|
||||
}
|
||||
|
||||
void NativeShortCall::set_destination(address dest) {
|
||||
assert(NativeShortCall::is_at(instruction_address()), "unexpected code at call site");
|
||||
assert(is_jal(), "Should be jal instruction!");
|
||||
intptr_t offset = (intptr_t)(dest - instruction_address());
|
||||
assert((offset & 0x1) == 0, "bad alignment");
|
||||
assert(Assembler::is_simm21(offset), "encoding constraint");
|
||||
unsigned int insn = 0b1101111; // jal
|
||||
address pInsn = (address)(&insn);
|
||||
Assembler::patch(pInsn, 31, 31, (offset >> 20) & 0x1);
|
||||
Assembler::patch(pInsn, 30, 21, (offset >> 1) & 0x3ff);
|
||||
Assembler::patch(pInsn, 20, 20, (offset >> 11) & 0x1);
|
||||
Assembler::patch(pInsn, 19, 12, (offset >> 12) & 0xff);
|
||||
Assembler::patch(pInsn, 11, 7, ra->encoding()); // Rd must be x1, need ra
|
||||
set_int_at(0, insn);
|
||||
}
|
||||
|
||||
void NativeShortCall::verify() {
|
||||
assert(NativeShortCall::is_at(instruction_address()),
|
||||
"unexpected code at call site: %p", instruction_address());
|
||||
}
|
||||
|
||||
void NativeShortCall::print() {
|
||||
assert(NativeShortCall::is_at(instruction_address()), "unexpected code at call site");
|
||||
tty->print_cr(PTR_FORMAT ": jal/auipc,ld,jalr x1, offset/reg", p2i(instruction_address()));
|
||||
}
|
||||
|
||||
// The important thing is that threads are able to execute this
|
||||
// call instruction at all times. (cmodx)
|
||||
//
|
||||
// Used in the runtime linkage of calls; see class CompiledIC.
|
||||
//
|
||||
// Add parameter assert_lock to switch off assertion
|
||||
// during code generation, where no lock is needed.
|
||||
bool NativeShortCall::set_destination_mt_safe(address dest, bool assert_lock) {
|
||||
assert(!assert_lock ||
|
||||
(CodeCache_lock->is_locked() || SafepointSynchronize::is_at_safepoint()) ||
|
||||
CompiledICLocker::is_safe(instruction_address()),
|
||||
"concurrent code patching");
|
||||
|
||||
address call_addr = instruction_address();
|
||||
assert(NativeCall::is_at(call_addr), "unexpected code at call site");
|
||||
|
||||
reloc_set_destination(dest);
|
||||
|
||||
ICache::invalidate_range(call_addr, instruction_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NativeShortCall::reloc_set_destination(address dest) {
|
||||
address call_addr = instruction_address();
|
||||
assert(NativeCall::is_at(call_addr), "unexpected code at call site");
|
||||
|
||||
// Patch the constant in the call's trampoline stub.
|
||||
address trampoline_stub_addr = get_trampoline();
|
||||
if (trampoline_stub_addr != nullptr) {
|
||||
assert(!NativeShortCallTrampolineStub::is_at(dest), "chained trampolines");
|
||||
NativeShortCallTrampolineStub::at(trampoline_stub_addr)->set_destination(dest);
|
||||
}
|
||||
|
||||
// Patch the call.
|
||||
if (Assembler::reachable_from_branch_at(call_addr, dest)) {
|
||||
set_destination(dest);
|
||||
} else {
|
||||
assert (trampoline_stub_addr != nullptr, "we need a trampoline");
|
||||
set_destination(trampoline_stub_addr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
address NativeShortCall::get_trampoline() {
|
||||
address call_addr = instruction_address();
|
||||
|
||||
CodeBlob *code = CodeCache::find_blob(call_addr);
|
||||
assert(code != nullptr, "Could not find the containing code blob");
|
||||
|
||||
address jal_destination = MacroAssembler::pd_call_destination(call_addr);
|
||||
if (code != nullptr && code->contains(jal_destination) && NativeShortCallTrampolineStub::is_at(jal_destination)) {
|
||||
return jal_destination;
|
||||
}
|
||||
|
||||
if (code != nullptr && code->is_nmethod()) {
|
||||
return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool NativeShortCall::has_trampoline() {
|
||||
return NativeShortCall::get_trampoline() != nullptr;
|
||||
}
|
||||
|
||||
address NativeShortCall::trampoline_destination() {
|
||||
return NativeShortCallTrampolineStub::at(get_trampoline())->destination();
|
||||
}
|
||||
|
||||
NativeShortCall* NativeShortCall::at(address addr) {
|
||||
assert_cond(addr != nullptr);
|
||||
assert(NativeShortCall::is_at(addr), "unexpected code at call site: %p", addr);
|
||||
NativeShortCall* call = (NativeShortCall*)(addr);
|
||||
return call;
|
||||
}
|
||||
|
||||
bool NativeShortCall::is_at(address addr) {
|
||||
if (MacroAssembler::is_jal_at(addr)) {
|
||||
if (MacroAssembler::extract_rd(addr) == x1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NativeShortCall::is_call_before(address return_address) {
|
||||
return NativeShortCall::is_at(return_address - instruction_size);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// NativeFarCall
|
||||
//
|
||||
// Implements direct far calling loading an address from the stub section version of reloc call.
|
||||
// This is the default (experimental flag UseTrampolines, default false).
|
||||
|
||||
class NativeFarCall: public NativeInstruction {
|
||||
public:
|
||||
@ -478,99 +224,51 @@ bool NativeFarCall::is_call_before(address return_address) {
|
||||
// NativeCall
|
||||
|
||||
address NativeCall::instruction_address() const {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->instruction_address();
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->instruction_address();
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->instruction_address();
|
||||
}
|
||||
|
||||
address NativeCall::next_instruction_address() const {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->next_instruction_address();
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->next_instruction_address();
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->next_instruction_address();
|
||||
}
|
||||
|
||||
address NativeCall::return_address() const {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->return_address();
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->return_address();
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->return_address();
|
||||
}
|
||||
|
||||
address NativeCall::destination() const {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->destination();
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->destination();
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->destination();
|
||||
}
|
||||
|
||||
address NativeCall::reloc_destination(address orig_address) {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->reloc_destination(orig_address);
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->reloc_destination(orig_address);
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->reloc_destination(orig_address);
|
||||
}
|
||||
|
||||
void NativeCall::set_destination(address dest) {
|
||||
if (UseTrampolines) {
|
||||
NativeShortCall::at(addr_at(0))->set_destination(dest);
|
||||
} else {
|
||||
NativeFarCall::at(addr_at(0))->set_destination(dest);
|
||||
}
|
||||
NativeFarCall::at(addr_at(0))->set_destination(dest);
|
||||
}
|
||||
|
||||
void NativeCall::verify() {
|
||||
if (UseTrampolines) {
|
||||
NativeShortCall::at(addr_at(0))->verify();
|
||||
} else {
|
||||
NativeFarCall::at(addr_at(0))->verify();;
|
||||
}
|
||||
NativeFarCall::at(addr_at(0))->verify();;
|
||||
}
|
||||
|
||||
void NativeCall::print() {
|
||||
if (UseTrampolines) {
|
||||
NativeShortCall::at(addr_at(0))->print();
|
||||
} else {
|
||||
NativeFarCall::at(addr_at(0))->print();;
|
||||
}
|
||||
NativeFarCall::at(addr_at(0))->print();;
|
||||
}
|
||||
|
||||
bool NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->set_destination_mt_safe(dest, assert_lock);
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->set_destination_mt_safe(dest, assert_lock);
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->set_destination_mt_safe(dest, assert_lock);
|
||||
}
|
||||
|
||||
bool NativeCall::reloc_set_destination(address dest) {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::at(addr_at(0))->reloc_set_destination(dest);
|
||||
} else {
|
||||
return NativeFarCall::at(addr_at(0))->reloc_set_destination(dest);
|
||||
}
|
||||
return NativeFarCall::at(addr_at(0))->reloc_set_destination(dest);
|
||||
}
|
||||
|
||||
bool NativeCall::is_at(address addr) {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::is_at(addr);
|
||||
} else {
|
||||
return NativeFarCall::is_at(addr);
|
||||
}
|
||||
return NativeFarCall::is_at(addr);
|
||||
}
|
||||
|
||||
bool NativeCall::is_call_before(address return_address) {
|
||||
if (UseTrampolines) {
|
||||
return NativeShortCall::is_call_before(return_address);
|
||||
} else {
|
||||
return NativeFarCall::is_call_before(return_address);
|
||||
}
|
||||
return NativeFarCall::is_call_before(return_address);
|
||||
}
|
||||
|
||||
NativeCall* nativeCall_at(address addr) {
|
||||
@ -583,11 +281,7 @@ NativeCall* nativeCall_at(address addr) {
|
||||
NativeCall* nativeCall_before(address return_address) {
|
||||
assert_cond(return_address != nullptr);
|
||||
NativeCall* call = nullptr;
|
||||
if (UseTrampolines) {
|
||||
call = (NativeCall*)(return_address - NativeShortCall::return_address_offset);
|
||||
} else {
|
||||
call = (NativeCall*)(return_address - NativeFarCall::return_address_offset);
|
||||
}
|
||||
call = (NativeCall*)(return_address - NativeFarCall::return_address_offset);
|
||||
DEBUG_ONLY(call->verify());
|
||||
return call;
|
||||
}
|
||||
|
@ -124,12 +124,9 @@ class NativeCall: private NativeInstruction {
|
||||
public:
|
||||
|
||||
static int byte_size() {
|
||||
if (UseTrampolines) {
|
||||
return NativeInstruction::instruction_size; // jal
|
||||
} else {
|
||||
return 3 * NativeInstruction::instruction_size; // auipc + ld + jalr
|
||||
}
|
||||
return 3 * NativeInstruction::instruction_size; // auipc + ld + jalr
|
||||
}
|
||||
|
||||
// Creation
|
||||
friend NativeCall* nativeCall_at(address addr);
|
||||
friend NativeCall* nativeCall_before(address return_address);
|
||||
|
@ -1238,38 +1238,25 @@ bool needs_acquiring_load_reserved(const Node *n)
|
||||
|
||||
int MachCallStaticJavaNode::ret_addr_offset()
|
||||
{
|
||||
if (UseTrampolines) {
|
||||
return 1 * NativeInstruction::instruction_size; // jal
|
||||
}
|
||||
return 3 * NativeInstruction::instruction_size; // auipc + ld + jalr
|
||||
}
|
||||
|
||||
int MachCallDynamicJavaNode::ret_addr_offset()
|
||||
{
|
||||
if (UseTrampolines) {
|
||||
return NativeMovConstReg::movptr2_instruction_size + NativeInstruction::instruction_size; // movptr2, jal
|
||||
}
|
||||
return NativeMovConstReg::movptr2_instruction_size + (3 * NativeInstruction::instruction_size); // movptr2, auipc + ld + jal
|
||||
}
|
||||
|
||||
int MachCallRuntimeNode::ret_addr_offset() {
|
||||
// For generated stubs the call will be:
|
||||
// auipc + ld + jalr
|
||||
// Using trampos:
|
||||
// jal(addr)
|
||||
// or with far branches
|
||||
// jal(trampoline_stub)
|
||||
// for real runtime callouts it will be 8 instructions
|
||||
// For address inside the code cache the call will be:
|
||||
// auipc + jalr
|
||||
// For real runtime callouts it will be 8 instructions
|
||||
// see riscv_enc_java_to_runtime
|
||||
// la(t0, retaddr) -> auipc + addi
|
||||
// sd(t0, Address(xthread, JavaThread::last_Java_pc_offset())) -> sd
|
||||
// movptr(t1, addr, offset, t0) -> lui + lui + slli + add
|
||||
// jalr(t1, offset) -> jalr
|
||||
if (CodeCache::contains(_entry_point)) {
|
||||
if (UseTrampolines) {
|
||||
return 1 * NativeInstruction::instruction_size;
|
||||
}
|
||||
return 3 * NativeInstruction::instruction_size;
|
||||
return 2 * NativeInstruction::instruction_size;
|
||||
} else {
|
||||
return 8 * NativeInstruction::instruction_size;
|
||||
}
|
||||
@ -2433,10 +2420,8 @@ encode %{
|
||||
// The NOP here is purely to ensure that eliding a call to
|
||||
// JVM_EnsureMaterializedForStackWalk doesn't change the code size.
|
||||
__ nop();
|
||||
if (!UseTrampolines) {
|
||||
__ nop();
|
||||
__ nop();
|
||||
}
|
||||
__ nop();
|
||||
__ nop();
|
||||
__ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
|
||||
} else {
|
||||
int method_index = resolved_method_index(masm);
|
||||
@ -2487,17 +2472,13 @@ encode %{
|
||||
enc_class riscv_enc_java_to_runtime(method meth) %{
|
||||
Assembler::IncompressibleRegion ir(masm); // Fixed length: see ret_addr_offset
|
||||
|
||||
// some calls to generated routines (arraycopy code) are scheduled
|
||||
// by C2 as runtime calls. if so we can call them using a jr (they
|
||||
// will be in a reachable segment) otherwise we have to use a jalr
|
||||
// which loads the absolute address into a register.
|
||||
// Some calls to generated routines (arraycopy code) are scheduled by C2
|
||||
// as runtime calls. if so we can call them using a far call (they will be
|
||||
// in the code cache, thus in a reachable segment) otherwise we have to use
|
||||
// a movptr+jalr pair which loads the absolute address into a register.
|
||||
address entry = (address)$meth$$method;
|
||||
if (CodeCache::contains(entry)) {
|
||||
address call = __ reloc_call(Address(entry, relocInfo::runtime_call_type));
|
||||
if (call == nullptr) {
|
||||
ciEnv::current()->record_failure("CodeCache is full");
|
||||
return;
|
||||
}
|
||||
__ far_call(Address(entry, relocInfo::runtime_call_type));
|
||||
__ post_call_nop();
|
||||
} else {
|
||||
Label retaddr;
|
||||
|
Loading…
Reference in New Issue
Block a user