8257882: Implement linkToNative intrinsic on AArch64

Reviewed-by: jvernee, mcimadamore, aph
This commit is contained in:
Nick Gasson 2020-12-17 03:24:40 +00:00
parent 11bd7a814f
commit 643ddc1320
13 changed files with 266 additions and 57 deletions

View File

@ -1772,8 +1772,13 @@ int MachCallRuntimeNode::ret_addr_offset() {
}
int MachCallNativeNode::ret_addr_offset() {
ShouldNotReachHere();
return -1;
// This is implemented using aarch64_enc_java_to_runtime as above.
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb) {
return 1 * NativeInstruction::instruction_size;
} else {
return 6 * NativeInstruction::instruction_size;
}
}
// Indicate if the safepoint node needs the polling page as an input
@ -16044,6 +16049,21 @@ instruct CallLeafNoFPDirect(method meth)
ins_pipe(pipe_class_call);
%}
instruct CallNativeDirect(method meth)
%{
match(CallNative);
effect(USE meth);
ins_cost(CALL_COST);
format %{ "CALL, native $meth" %}
ins_encode( aarch64_enc_java_to_runtime(meth) );
ins_pipe(pipe_class_call);
%}
// Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller.

View File

@ -354,6 +354,11 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const {
assert(map->include_argument_oops(), "should be set by clear");
vmassert(jfa->last_Java_pc() != NULL, "not walkable");
frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
if (jfa->saved_fp_address()) {
update_map_with_saved_link(map, jfa->saved_fp_address());
}
return fr;
}

View File

@ -314,7 +314,7 @@ class ZSaveLiveRegisters {
private:
MacroAssembler* const _masm;
RegSet _gp_regs;
RegSet _fp_regs;
FloatRegSet _fp_regs;
public:
void initialize(ZLoadBarrierStubC2* stub) {
@ -327,7 +327,7 @@ public:
if (vm_reg->is_Register()) {
_gp_regs += RegSet::of(vm_reg->as_Register());
} else if (vm_reg->is_FloatRegister()) {
_fp_regs += RegSet::of((Register)vm_reg->as_FloatRegister());
_fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
} else {
fatal("Unknown register type");
}

View File

@ -31,6 +31,9 @@ private:
// FP value associated with _last_Java_sp:
intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to
// (Optional) location of saved FP register, which GCs want to inspect
intptr_t** volatile _saved_fp_address;
public:
// Each arch must define reset, save, restore
// These are used by objects that only care about:
@ -44,6 +47,7 @@ public:
OrderAccess::release();
_last_Java_fp = NULL;
_last_Java_pc = NULL;
_saved_fp_address = NULL;
}
void copy(JavaFrameAnchor* src) {
@ -62,6 +66,8 @@ public:
_last_Java_pc = src->_last_Java_pc;
// Must be last so profiler will always see valid frame if has_last_frame() is true
_last_Java_sp = src->_last_Java_sp;
_saved_fp_address = src->_saved_fp_address;
}
bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
@ -72,9 +78,12 @@ public:
address last_Java_pc(void) { return _last_Java_pc; }
intptr_t** saved_fp_address(void) const { return _saved_fp_address; }
private:
static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
static ByteSize saved_fp_address_offset() { return byte_offset_of(JavaFrameAnchor, _saved_fp_address); }
public:

View File

@ -318,6 +318,8 @@ void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
// Always clear the pc because it could have been set by make_walkable()
str(zr, Address(rthread, JavaThread::last_Java_pc_offset()));
str(zr, Address(rthread, JavaThread::saved_fp_address_offset()));
}
// Calls to C land
@ -5293,8 +5295,9 @@ void MacroAssembler::cache_wbsync(bool is_pre) {
}
void MacroAssembler::verify_sve_vector_length() {
// Make sure that native code does not change SVE vector length.
if (!UseSVE) return;
Label verify_ok;
assert(UseSVE > 0, "should only be used for SVE");
movw(rscratch1, zr);
sve_inc(rscratch1, B);
subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());

View File

@ -474,8 +474,8 @@ public:
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
static RegSet call_clobbered_registers();

View File

@ -63,10 +63,6 @@ class RegisterImpl: public AbstractRegisterImpl {
bool has_byte_register() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_byte_registers; }
const char* name() const;
int encoding_nocheck() const { return (intptr_t)this; }
// Return the bit which represents this register. This is intended
// to be ORed into a bitmask: for usage see class RegSet below.
uint64_t bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
};
// The integer registers of the aarch64 architecture
@ -304,91 +300,93 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
static const int max_pr;
};
class RegSetIterator;
template <class RegImpl = Register> class RegSetIterator;
// A set of registers
class RegSet {
template <class RegImpl>
class AbstractRegSet {
uint32_t _bitset;
RegSet(uint32_t bitset) : _bitset(bitset) { }
AbstractRegSet(uint32_t bitset) : _bitset(bitset) { }
public:
RegSet() : _bitset(0) { }
AbstractRegSet() : _bitset(0) { }
RegSet(Register r1) : _bitset(r1->bit()) { }
AbstractRegSet(RegImpl r1) : _bitset(1 << r1->encoding()) { }
RegSet operator+(const RegSet aSet) const {
RegSet result(_bitset | aSet._bitset);
AbstractRegSet operator+(const AbstractRegSet aSet) const {
AbstractRegSet result(_bitset | aSet._bitset);
return result;
}
RegSet operator-(const RegSet aSet) const {
RegSet result(_bitset & ~aSet._bitset);
AbstractRegSet operator-(const AbstractRegSet aSet) const {
AbstractRegSet result(_bitset & ~aSet._bitset);
return result;
}
RegSet &operator+=(const RegSet aSet) {
AbstractRegSet &operator+=(const AbstractRegSet aSet) {
*this = *this + aSet;
return *this;
}
RegSet &operator-=(const RegSet aSet) {
AbstractRegSet &operator-=(const AbstractRegSet aSet) {
*this = *this - aSet;
return *this;
}
static RegSet of(Register r1) {
return RegSet(r1);
static AbstractRegSet of(RegImpl r1) {
return AbstractRegSet(r1);
}
static RegSet of(Register r1, Register r2) {
static AbstractRegSet of(RegImpl r1, RegImpl r2) {
return of(r1) + r2;
}
static RegSet of(Register r1, Register r2, Register r3) {
static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3) {
return of(r1, r2) + r3;
}
static RegSet of(Register r1, Register r2, Register r3, Register r4) {
static AbstractRegSet of(RegImpl r1, RegImpl r2, RegImpl r3, RegImpl r4) {
return of(r1, r2, r3) + r4;
}
static RegSet range(Register start, Register end) {
static AbstractRegSet range(RegImpl start, RegImpl end) {
uint32_t bits = ~0;
bits <<= start->encoding();
bits <<= 31 - end->encoding();
bits >>= 31 - end->encoding();
return RegSet(bits);
return AbstractRegSet(bits);
}
uint32_t bits() const { return _bitset; }
private:
Register first() {
uint32_t first = _bitset & -_bitset;
return first ? as_Register(exact_log2(first)) : noreg;
}
RegImpl first();
public:
friend class RegSetIterator;
friend class RegSetIterator<RegImpl>;
RegSetIterator begin();
RegSetIterator<RegImpl> begin();
};
typedef AbstractRegSet<Register> RegSet;
typedef AbstractRegSet<FloatRegister> FloatRegSet;
template <class RegImpl>
class RegSetIterator {
RegSet _regs;
AbstractRegSet<RegImpl> _regs;
public:
RegSetIterator(RegSet x): _regs(x) {}
RegSetIterator(AbstractRegSet<RegImpl> x): _regs(x) {}
RegSetIterator(const RegSetIterator& mit) : _regs(mit._regs) {}
RegSetIterator& operator++() {
Register r = _regs.first();
if (r != noreg)
RegImpl r = _regs.first();
if (r->is_valid())
_regs -= r;
return *this;
}
@ -400,13 +398,26 @@ public:
return ! (rhs == *this);
}
Register operator*() {
RegImpl operator*() {
return _regs.first();
}
};
inline RegSetIterator RegSet::begin() {
return RegSetIterator(*this);
template <class RegImpl>
inline RegSetIterator<RegImpl> AbstractRegSet<RegImpl>::begin() {
return RegSetIterator<RegImpl>(*this);
}
template <>
inline Register AbstractRegSet<Register>::first() {
uint32_t first = _bitset & -_bitset;
return first ? as_Register(exact_log2(first)) : noreg;
}
template <>
inline FloatRegister AbstractRegSet<FloatRegister>::first() {
uint32_t first = _bitset & -_bitset;
return first ? as_FloatRegister(exact_log2(first)) : fnoreg;
}
#endif // CPU_AARCH64_REGISTER_AARCH64_HPP

View File

@ -1855,10 +1855,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Force this write out before the read below
__ dmb(Assembler::ISH);
if (UseSVE > 0) {
// Make sure that jni code does not change SVE vector length.
__ verify_sve_vector_length();
}
__ verify_sve_vector_length();
// Check for safepoint operation in progress and/or pending suspend requests.
{
@ -3071,9 +3068,168 @@ void OptoRuntime::generate_exception_blob() {
}
#endif // COMPILER2
// ---------------------------------------------------------------
class NativeInvokerGenerator : public StubCodeGenerator {
address _call_target;
int _shadow_space_bytes;
const GrowableArray<VMReg>& _input_registers;
const GrowableArray<VMReg>& _output_registers;
public:
NativeInvokerGenerator(CodeBuffer* buffer,
address call_target,
int shadow_space_bytes,
const GrowableArray<VMReg>& input_registers,
const GrowableArray<VMReg>& output_registers)
: StubCodeGenerator(buffer, PrintMethodHandleStubs),
_call_target(call_target),
_shadow_space_bytes(shadow_space_bytes),
_input_registers(input_registers),
_output_registers(output_registers) {}
void generate();
private:
#ifdef ASSERT
bool target_uses_register(VMReg reg) {
return _input_registers.contains(reg) || _output_registers.contains(reg);
}
#endif
};
static const int native_invoker_code_size = 1024;
BufferBlob* SharedRuntime::make_native_invoker(address call_target,
int shadow_space_bytes,
const GrowableArray<VMReg>& input_registers,
const GrowableArray<VMReg>& output_registers) {
return NULL;
int shadow_space_bytes,
const GrowableArray<VMReg>& input_registers,
const GrowableArray<VMReg>& output_registers) {
BufferBlob* _invoke_native_blob =
BufferBlob::create("nep_invoker_blob", native_invoker_code_size);
if (_invoke_native_blob == NULL)
return NULL; // allocation failure
CodeBuffer code(_invoke_native_blob);
NativeInvokerGenerator g(&code, call_target, shadow_space_bytes, input_registers, output_registers);
g.generate();
code.log_section_sizes("nep_invoker_blob");
return _invoke_native_blob;
}
void NativeInvokerGenerator::generate() {
assert(!(target_uses_register(rscratch1->as_VMReg())
|| target_uses_register(rscratch2->as_VMReg())
|| target_uses_register(rthread->as_VMReg())),
"Register conflict");
MacroAssembler* masm = _masm;
__ set_last_Java_frame(sp, noreg, lr, rscratch1);
__ enter();
// Store a pointer to the previous R29 (RFP) saved on the stack as it
// may contain an oop if PreserveFramePointer is off. This value is
// retrieved later by frame::sender_for_entry_frame() when the stack
// is walked.
__ mov(rscratch1, sp);
__ str(rscratch1, Address(rthread, JavaThread::saved_fp_address_offset()));
// State transition
__ mov(rscratch1, _thread_in_native);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
assert(_shadow_space_bytes == 0, "not expecting shadow space on AArch64");
rt_call(masm, _call_target);
__ mov(rscratch1, _thread_in_native_trans);
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
// Force this write out before the read below
__ membar(Assembler::LoadLoad | Assembler::LoadStore |
Assembler::StoreLoad | Assembler::StoreStore);
__ verify_sve_vector_length();
Label L_after_safepoint_poll;
Label L_safepoint_poll_slow_path;
__ safepoint_poll(L_safepoint_poll_slow_path, true /* at_return */, true /* acquire */, false /* in_nmethod */);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbnzw(rscratch1, L_safepoint_poll_slow_path);
__ bind(L_after_safepoint_poll);
// change thread state
__ mov(rscratch1, _thread_in_Java);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
__ block_comment("reguard stack check");
Label L_reguard;
Label L_after_reguard;
__ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
__ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled);
__ br(Assembler::EQ, L_reguard);
__ bind(L_after_reguard);
__ reset_last_Java_frame(true);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
//////////////////////////////////////////////////////////////////////////////
__ block_comment("{ L_safepoint_poll_slow_path");
__ bind(L_safepoint_poll_slow_path);
// Need to save the native result registers around any runtime calls.
RegSet spills;
FloatRegSet fp_spills;
for (int i = 0; i < _output_registers.length(); i++) {
VMReg output = _output_registers.at(i);
if (output->is_Register()) {
spills += RegSet::of(output->as_Register());
} else if (output->is_FloatRegister()) {
fp_spills += FloatRegSet::of(output->as_FloatRegister());
}
}
__ push(spills, sp);
__ push_fp(fp_spills, sp);
__ mov(c_rarg0, rthread);
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
__ blr(rscratch1);
__ pop_fp(fp_spills, sp);
__ pop(spills, sp);
__ b(L_after_safepoint_poll);
__ block_comment("} L_safepoint_poll_slow_path");
//////////////////////////////////////////////////////////////////////////////
__ block_comment("{ L_reguard");
__ bind(L_reguard);
__ push(spills, sp);
__ push_fp(fp_spills, sp);
rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
__ pop_fp(fp_spills, sp);
__ pop(spills, sp);
__ b(L_after_reguard);
__ block_comment("} L_reguard");
//////////////////////////////////////////////////////////////////////////////
__ flush();
}

View File

@ -1319,7 +1319,7 @@ class StubGenerator: public StubCodeGenerator {
= MacroAssembler::call_clobbered_registers() - rscratch1;
__ mov(rscratch1, (uint64_t)0xdeadbeef);
__ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
for (RegSetIterator it = clobbered.begin(); *it != noreg; ++it) {
for (RegSetIterator<> it = clobbered.begin(); *it != noreg; ++it) {
__ mov(*it, rscratch1);
}
#endif
@ -5696,7 +5696,7 @@ class StubGenerator: public StubCodeGenerator {
// Register allocation
RegSetIterator regs = (RegSet::range(r0, r26) - r18_tls).begin();
RegSetIterator<> regs = (RegSet::range(r0, r26) - r18_tls).begin();
Pa_base = *regs; // Argument registers
if (squaring)
Pb_base = Pa_base;

View File

@ -1373,10 +1373,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ push(dtos);
__ push(ltos);
if (UseSVE > 0) {
// Make sure that jni code does not change SVE vector length.
__ verify_sve_vector_length();
}
__ verify_sve_vector_length();
// change thread state
__ mov(rscratch1, _thread_in_native_trans);

View File

@ -39,6 +39,10 @@
return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
}
static ByteSize saved_fp_address_offset() {
return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::saved_fp_address_offset();
}
bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
bool isInJava);

View File

@ -38,6 +38,10 @@
return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
}
static ByteSize saved_fp_address_offset() {
return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::saved_fp_address_offset();
}
bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
bool isInJava);

View File

@ -23,7 +23,7 @@
/*
* @test
* @requires os.arch=="amd64" | os.arch=="x86_64"
* @requires os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
* @run testng/othervm
* -Djdk.internal.foreign.ProgrammableInvoker.USE_SPEC=true
* -Djdk.internal.foreign.ProgrammableInvoker.USE_INTRINSICS=true