8339771: RISC-V: Reduce icache flushes
Reviewed-by: fyang, mli, luhenry
This commit is contained in:
parent
b1f8d2ea76
commit
97a3933f1b
@ -705,6 +705,16 @@ public:
|
||||
emit(insn);
|
||||
}
|
||||
|
||||
void fencei() {
|
||||
unsigned insn = 0;
|
||||
patch((address)&insn, 6, 0, 0b0001111); // opcode
|
||||
patch((address)&insn, 11, 7, 0b00000); // rd
|
||||
patch((address)&insn, 14, 12, 0b001); // func
|
||||
patch((address)&insn, 19, 15, 0b00000); // rs1
|
||||
patch((address)&insn, 31, 20, 0b000000000000); // fm
|
||||
emit(insn);
|
||||
}
|
||||
|
||||
#define INSN(NAME, op, funct3, funct7) \
|
||||
void NAME() { \
|
||||
unsigned insn = 0; \
|
||||
|
@ -636,8 +636,20 @@ void ZBarrierSetAssembler::patch_barrier_relocation(address addr, int format) {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// A full fence is generated before icache_flush by default in invalidate_word
|
||||
ICache::invalidate_range(addr, bytes);
|
||||
// If we are using UseCtxFencei no ICache invalidation is needed here.
|
||||
// Instead every hart will preform an fence.i either by a Java thread
|
||||
// (due to patching epoch will take it to slow path),
|
||||
// or by the kernel when a Java thread is moved to a hart.
|
||||
// The instruction streams changes must only happen before the disarm of
|
||||
// the nmethod barrier. Where the disarm have a leading full two way fence.
|
||||
// If this is performed during a safepoint, all Java threads will emit a fence.i
|
||||
// before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
|
||||
if (!UseCtxFencei) {
|
||||
// ICache invalidation is a serialization point.
|
||||
// The above patching of instructions happens before the invalidation.
|
||||
// Hence it have a leading full two way fence (wr, wr).
|
||||
ICache::invalidate_range(addr, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
|
@ -122,6 +122,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
|
||||
"Use RVV instructions for left/right shift of BigInteger") \
|
||||
product(bool, UseTrampolines, false, EXPERIMENTAL, \
|
||||
"Far calls uses jal to trampoline.")
|
||||
"Far calls uses jal to trampoline.") \
|
||||
product(bool, UseCtxFencei, false, EXPERIMENTAL, \
|
||||
"Use PR_RISCV_CTX_SW_FENCEI_ON to avoid explicit icache flush")
|
||||
|
||||
#endif // CPU_RISCV_GLOBALS_RISCV_HPP
|
||||
|
@ -3159,6 +3159,13 @@ void MacroAssembler::membar(uint32_t order_constraint) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::cmodx_fence() {
|
||||
BLOCK_COMMENT("cmodx fence");
|
||||
if (VM_Version::supports_fencei_barrier()) {
|
||||
Assembler::fencei();
|
||||
}
|
||||
}
|
||||
|
||||
// Form an address from base + offset in Rd. Rd my or may not
|
||||
// actually be used: you must use the Address that is returned. It
|
||||
// is up to you to ensure that the shift provided matches the size
|
||||
|
@ -431,6 +431,8 @@ class MacroAssembler: public Assembler {
|
||||
}
|
||||
}
|
||||
|
||||
void cmodx_fence();
|
||||
|
||||
void pause() {
|
||||
Assembler::fence(w, 0);
|
||||
}
|
||||
|
@ -55,7 +55,21 @@ void Relocation::pd_set_data_value(address x, bool verify_only) {
|
||||
bytes = MacroAssembler::pd_patch_instruction_size(addr(), x);
|
||||
break;
|
||||
}
|
||||
ICache::invalidate_range(addr(), bytes);
|
||||
|
||||
// If we are using UseCtxFencei no ICache invalidation is needed here.
|
||||
// Instead every hart will preform an fence.i either by a Java thread
|
||||
// (due to patching epoch will take it to slow path),
|
||||
// or by the kernel when a Java thread is moved to a hart.
|
||||
// The instruction streams changes must only happen before the disarm of
|
||||
// the nmethod barrier. Where the disarm have a leading full two way fence.
|
||||
// If this is performed during a safepoint, all Java threads will emit a fence.i
|
||||
// before transitioning to 'Java', e.g. leaving native or the safepoint wait barrier.
|
||||
if (!UseCtxFencei) {
|
||||
// ICache invalidation is a serialization point.
|
||||
// The above patching of instructions happens before the invalidation.
|
||||
// Hence it have a leading full two way fence (wr, wr).
|
||||
ICache::invalidate_range(addr(), bytes);
|
||||
}
|
||||
}
|
||||
|
||||
address Relocation::pd_call_destination(address orig_addr) {
|
||||
|
@ -2428,6 +2428,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ la(t1, ExternalAddress(bs_asm->patching_epoch_addr()));
|
||||
__ lwu(t1, t1);
|
||||
__ sw(t1, thread_epoch_addr);
|
||||
// There are two ways this can work:
|
||||
// - The writer did system icache shootdown after the instruction stream update.
|
||||
// Hence do nothing.
|
||||
// - The writer trust us to make sure our icache is in sync before entering.
|
||||
// Hence use cmodx fence (fence.i, may change).
|
||||
if (UseCtxFencei) {
|
||||
__ cmodx_fence();
|
||||
}
|
||||
__ membar(__ LoadLoad);
|
||||
}
|
||||
|
||||
|
@ -285,6 +285,7 @@ class VM_Version : public Abstract_VM_Version {
|
||||
|
||||
// RISCV64 supports fast class initialization checks
|
||||
static bool supports_fast_class_init_checks() { return true; }
|
||||
static bool supports_fencei_barrier() { return ext_Zifencei.enabled(); }
|
||||
};
|
||||
|
||||
#endif // CPU_RISCV_VM_VERSION_RISCV_HPP
|
||||
|
@ -54,6 +54,24 @@ inline void OrderAccess::fence() {
|
||||
}
|
||||
|
||||
inline void OrderAccess::cross_modify_fence_impl() {
|
||||
// From 3 “Zifencei” Instruction-Fetch Fence, Version 2.0
|
||||
// "RISC-V does not guarantee that stores to instruction memory will be made
|
||||
// visible to instruction fetches on a RISC-V hart until that hart executes a
|
||||
// FENCE.I instruction. A FENCE.I instruction ensures that a subsequent
|
||||
// instruction fetch on a RISC-V hart will see any previous data stores
|
||||
// already visible to the same RISC-V hart. FENCE.I does not ensure that other
|
||||
// RISC-V harts’ instruction fetches will observe the local hart’s stores in a
|
||||
// multiprocessor system."
|
||||
//
|
||||
// Hence to be able to use fence.i directly we need a kernel that supports
|
||||
// PR_RISCV_CTX_SW_FENCEI_ON. Thus if context switch to another hart we are
|
||||
// ensured that instruction fetch will see any previous data stores
|
||||
//
|
||||
// The alternative is using full system IPI (system wide icache sync) then
|
||||
// this barrier is not strictly needed. As this is emitted in runtime slow-path
|
||||
// we will just always emit it, typically after a safepoint.
|
||||
guarantee(VM_Version::supports_fencei_barrier(), "Linux kernel require fence.i");
|
||||
__asm__ volatile("fence.i" : : : "memory");
|
||||
}
|
||||
|
||||
#endif // OS_CPU_LINUX_RISCV_ORDERACCESS_LINUX_RISCV_HPP
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <asm/hwcap.h>
|
||||
#include <ctype.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#ifndef HWCAP_ISA_I
|
||||
#define HWCAP_ISA_I nth_bit('I' - 'A')
|
||||
@ -82,6 +83,23 @@
|
||||
__v; \
|
||||
})
|
||||
|
||||
// prctl PR_RISCV_SET_ICACHE_FLUSH_CTX is from Linux 6.9
|
||||
#ifndef PR_RISCV_SET_ICACHE_FLUSH_CTX
|
||||
#define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
|
||||
#endif
|
||||
#ifndef PR_RISCV_CTX_SW_FENCEI_ON
|
||||
#define PR_RISCV_CTX_SW_FENCEI_ON 0
|
||||
#endif
|
||||
#ifndef PR_RISCV_CTX_SW_FENCEI_OFF
|
||||
#define PR_RISCV_CTX_SW_FENCEI_OFF 1
|
||||
#endif
|
||||
#ifndef PR_RISCV_SCOPE_PER_PROCESS
|
||||
#define PR_RISCV_SCOPE_PER_PROCESS 0
|
||||
#endif
|
||||
#ifndef PR_RISCV_SCOPE_PER_THREAD
|
||||
#define PR_RISCV_SCOPE_PER_THREAD 1
|
||||
#endif
|
||||
|
||||
uint32_t VM_Version::cpu_vector_length() {
|
||||
assert(ext_V.enabled(), "should not call this");
|
||||
return (uint32_t)read_csr(CSR_VLENB);
|
||||
@ -102,6 +120,7 @@ void VM_Version::setup_cpu_available_features() {
|
||||
if (!RiscvHwprobe::probe_features()) {
|
||||
os_aux_features();
|
||||
}
|
||||
|
||||
char* uarch = os_uarch_additional_features();
|
||||
vendor_features();
|
||||
|
||||
@ -155,6 +174,24 @@ void VM_Version::setup_cpu_available_features() {
|
||||
i++;
|
||||
}
|
||||
|
||||
// Linux kernel require Zifencei
|
||||
if (!ext_Zifencei.enabled()) {
|
||||
log_info(os, cpu)("Zifencei not found, required by Linux, enabling.");
|
||||
ext_Zifencei.enable_feature();
|
||||
}
|
||||
|
||||
if (UseCtxFencei) {
|
||||
// Note that we can set this up only for effected threads
|
||||
// via PR_RISCV_SCOPE_PER_THREAD, i.e. on VM attach/deattach.
|
||||
int ret = prctl(PR_RISCV_SET_ICACHE_FLUSH_CTX, PR_RISCV_CTX_SW_FENCEI_ON, PR_RISCV_SCOPE_PER_PROCESS);
|
||||
if (ret == 0) {
|
||||
log_debug(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) enabled.");
|
||||
} else {
|
||||
FLAG_SET_ERGO(UseCtxFencei, false);
|
||||
log_info(os, cpu)("UseCtxFencei (PR_RISCV_CTX_SW_FENCEI_ON) disabled, unsupported by kernel.");
|
||||
}
|
||||
}
|
||||
|
||||
_features_string = os::strdup(buf);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user