8294366: RISC-V: Partially mark out incompressible regions

Reviewed-by: fyang, yadongwang
This commit is contained in:
Xiaolin Zheng 2022-10-08 06:41:45 +00:00 committed by Fei Yang
parent 495c043533
commit 542cc602a7
8 changed files with 75 additions and 61 deletions

@ -640,8 +640,8 @@ public:
emit(insn); \
}
INSN(_beq, 0b1100011, 0b000);
INSN(_bne, 0b1100011, 0b001);
INSN(beq, 0b1100011, 0b000);
INSN(bne, 0b1100011, 0b001);
INSN(bge, 0b1100011, 0b101);
INSN(bgeu, 0b1100011, 0b111);
INSN(blt, 0b1100011, 0b100);
@ -858,7 +858,7 @@ public:
emit(insn); \
}
INSN(_jal, 0b1101111);
INSN(jal, 0b1101111);
#undef INSN
@ -2079,20 +2079,30 @@ enum Nf {
// RISC-V Compressed Instructions Extension
// ========================================
// Note:
// 1. When UseRVC is enabled, 32-bit instructions under 'CompressibleRegion's will be
// transformed to 16-bit instructions if compressible.
// 2. RVC instructions in Assembler always begin with 'c_' prefix, as 'c_li',
// but most of time we have no need to explicitly use these instructions.
// 3. 'CompressibleRegion' is introduced to hint instructions in this Region's RTTI range
// are qualified to be compressed with their 2-byte versions.
// An example:
// 1. Assembler functions encoding 16-bit compressed instructions always begin with a 'c_'
// prefix, such as 'c_add'. Correspondingly, assembler functions encoding normal 32-bit
// instructions with begin with a '_' prefix, such as "_add". Most of time users have no
// need to explicitly emit these compressed instructions. Instead, they still use unified
// wrappers such as 'add' which do the compressing work through 'c_add' depending on the
// the operands of the instruction and availability of the RVC hardware extension.
//
// 2. 'CompressibleRegion' and 'IncompressibleRegion' are introduced to mark assembler scopes
// within which instructions are qualified or unqualified to be compressed into their 16-bit
// versions. An example:
//
// CompressibleRegion cr(_masm);
// __ andr(...); // this instruction could change to c.and if able to
//
// 4. Using -XX:PrintAssemblyOptions=no-aliases could distinguish RVC instructions from
// normal ones.
// __ add(...); // this instruction will be compressed into 'c.and' when possible
// {
// IncompressibleRegion ir(_masm);
// __ add(...); // this instruction will not be compressed
// {
// CompressibleRegion cr(_masm);
// __ add(...); // this instruction will be compressed into 'c.and' when possible
// }
// }
//
// 3. When printing JIT assembly code, using -XX:PrintAssemblyOptions=no-aliases could help
// distinguish compressed 16-bit instructions from normal 32-bit ones.
private:
bool _in_compressible_region;
@ -2101,21 +2111,36 @@ public:
void set_in_compressible_region(bool b) { _in_compressible_region = b; }
public:
// a compressible region
class CompressibleRegion : public StackObj {
// an abstract compressible region
class AbstractCompressibleRegion : public StackObj {
protected:
Assembler *_masm;
bool _saved_in_compressible_region;
public:
CompressibleRegion(Assembler *_masm)
protected:
AbstractCompressibleRegion(Assembler *_masm)
: _masm(_masm)
, _saved_in_compressible_region(_masm->in_compressible_region()) {
, _saved_in_compressible_region(_masm->in_compressible_region()) {}
};
// a compressible region
class CompressibleRegion : public AbstractCompressibleRegion {
public:
CompressibleRegion(Assembler *_masm) : AbstractCompressibleRegion(_masm) {
_masm->set_in_compressible_region(true);
}
~CompressibleRegion() {
_masm->set_in_compressible_region(_saved_in_compressible_region);
}
};
// an incompressible region
class IncompressibleRegion : public AbstractCompressibleRegion {
public:
IncompressibleRegion(Assembler *_masm) : AbstractCompressibleRegion(_masm) {
_masm->set_in_compressible_region(false);
}
~IncompressibleRegion() {
_masm->set_in_compressible_region(_saved_in_compressible_region);
}
};
// patch a 16-bit instruction.
static void c_patch(address a, unsigned msb, unsigned lsb, uint16_t val) {
@ -2816,43 +2841,8 @@ public:
#undef INSN
// --------------------------
// Conditional branch instructions
// --------------------------
#define INSN(NAME, C_NAME, NORMAL_NAME) \
void NAME(Register Rs1, Register Rs2, const int64_t offset) { \
/* beq/bne -> c.beqz/c.bnez */ \
if (do_compress() && \
(offset != 0 && Rs2 == x0 && Rs1->is_compressed_valid() && \
is_imm_in_range(offset, 8, 1))) { \
C_NAME(Rs1, offset); \
return; \
} \
NORMAL_NAME(Rs1, Rs2, offset); \
}
INSN(beq, c_beqz, _beq);
INSN(bne, c_bnez, _bne);
#undef INSN
// --------------------------
// Unconditional branch instructions
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, const int32_t offset) { \
/* jal -> c.j */ \
if (do_compress() && offset != 0 && Rd == x0 && is_imm_in_range(offset, 11, 1)) { \
c_j(offset); \
return; \
} \
_jal(Rd, offset); \
}
INSN(jal);
#undef INSN
// --------------------------
#define INSN(NAME) \
void NAME(Register Rd, Register Rs, const int32_t offset) { \

@ -323,8 +323,9 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) {
// first instruction with a jump. For this action to be legal we
// must ensure that this first instruction is a J, JAL or NOP.
// Make it a NOP.
IncompressibleRegion ir(this); // keep the nop as 4 bytes for patching.
assert_alignment(pc());
nop();
nop(); // 4 bytes
}
void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {

@ -243,6 +243,8 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
IncompressibleRegion ir(this); // Fixed length: see C2_MacroAssembler::entry_barrier_stub_size()
// make guard value 4-byte aligned so that it can be accessed by atomic instructions on riscv
int alignment_bytes = align(4);

@ -193,6 +193,8 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo
return;
}
Assembler::IncompressibleRegion ir(masm); // Fixed length: see entry_barrier_offset()
Label local_guard;
NMethodPatchingType patching_type = nmethod_patching_type();

@ -241,6 +241,7 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp,
set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
} else {
L.add_patch_at(code(), locator());
IncompressibleRegion ir(this); // the label address will be patched back.
set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
}
}
@ -549,6 +550,7 @@ void MacroAssembler::unimplemented(const char* what) {
}
void MacroAssembler::emit_static_call_stub() {
IncompressibleRegion ir(this); // Fixed length: see CompiledStaticCall::to_interp_stub_size().
// CompiledDirectStaticCall::set_to_interpreted knows the
// exact layout of this stub.
@ -751,6 +753,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
}
void MacroAssembler::la(Register Rd, Label &label) {
IncompressibleRegion ir(this); // the label address may be patched back.
la(Rd, target(label));
}
@ -2437,6 +2440,7 @@ void MacroAssembler::far_jump(Address entry, Register tmp) {
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
int32_t offset = 0;
if (far_branches()) {
// We can use auipc + jalr here because we know that the total size of
@ -2455,6 +2459,7 @@ void MacroAssembler::far_call(Address entry, Register tmp) {
assert(entry.rspec().type() == relocInfo::external_word_type
|| entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
IncompressibleRegion ir(this); // Fixed length: see MacroAssembler::far_branch_size()
int32_t offset = 0;
if (far_branches()) {
// We can use auipc + jalr here because we know that the total size of

@ -390,6 +390,7 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add
void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
CodeBuffer cb(code_pos, instruction_size);
MacroAssembler a(&cb);
Assembler::IncompressibleRegion ir(&a); // Fixed length: see NativeGeneralJump::get_instruction_size()
int32_t offset = 0;
a.movptr(t0, entry, offset); // lui, addi, slli, addi, slli

@ -1318,8 +1318,11 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
// insert a nop at the start of the prolog so we can patch in a
// branch if we need to invalidate the method later
MacroAssembler::assert_alignment(__ pc());
__ nop();
{
Assembler::IncompressibleRegion ir(&_masm); // keep the nop as 4 bytes for patching.
MacroAssembler::assert_alignment(__ pc());
__ nop(); // 4 bytes
}
assert_cond(C != NULL);
@ -1680,6 +1683,7 @@ void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
C2_MacroAssembler _masm(&cbuf);
Assembler::IncompressibleRegion ir(&_masm); // Fixed length: see BoxLockNode::size()
assert_cond(ra_ != NULL);
int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
@ -2268,6 +2272,7 @@ encode %{
enc_class riscv_enc_java_static_call(method meth) %{
C2_MacroAssembler _masm(&cbuf);
Assembler::IncompressibleRegion ir(&_masm); // Fixed length: see ret_addr_offset
address addr = (address)$meth$$method;
address call = NULL;
@ -2306,6 +2311,7 @@ encode %{
enc_class riscv_enc_java_dynamic_call(method meth) %{
C2_MacroAssembler _masm(&cbuf);
Assembler::IncompressibleRegion ir(&_masm); // Fixed length: see ret_addr_offset
int method_index = resolved_method_index(cbuf);
address call = __ ic_call((address)$meth$$method, method_index);
if (call == NULL) {
@ -2324,6 +2330,7 @@ encode %{
enc_class riscv_enc_java_to_runtime(method meth) %{
C2_MacroAssembler _masm(&cbuf);
Assembler::IncompressibleRegion ir(&_masm); // Fixed length: see ret_addr_offset
// some calls to generated routines (arraycopy code) are scheduled
// by C2 as runtime calls. if so we can call them using a jr (they

@ -956,8 +956,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
int vep_offset = ((intptr_t)__ pc()) - start;
// First instruction must be a nop as it may need to be patched on deoptimisation
MacroAssembler::assert_alignment(__ pc());
__ nop();
{
Assembler::IncompressibleRegion ir(masm); // keep the nop as 4 bytes for patching.
MacroAssembler::assert_alignment(__ pc());
__ nop(); // 4 bytes
}
gen_special_dispatch(masm,
method,
in_sig_bt,
@ -1108,8 +1111,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// If we have to make this method not-entrant we'll overwrite its
// first instruction with a jump.
MacroAssembler::assert_alignment(__ pc());
__ nop();
{
Assembler::IncompressibleRegion ir(masm); // keep the nop as 4 bytes for patching.
MacroAssembler::assert_alignment(__ pc());
__ nop(); // 4 bytes
}
if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
Label L_skip_barrier;