8295282: Use Zicboz/cbo.zero to zero-out memory on RISC-V
Reviewed-by: yadongwang, vkempik, fyang
This commit is contained in:
parent
d393e051e6
commit
e0c29307f7
@ -2712,6 +2712,44 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
// Cache Management Operations
|
||||
#define INSN(NAME, funct) \
|
||||
void NAME(Register Rs1) { \
|
||||
unsigned insn = 0; \
|
||||
patch((address)&insn, 6, 0, 0b0001111); \
|
||||
patch((address)&insn, 14, 12, 0b010); \
|
||||
patch_reg((address)&insn, 15, Rs1); \
|
||||
patch((address)&insn, 31, 20, funct); \
|
||||
emit(insn); \
|
||||
}
|
||||
|
||||
INSN(cbo_inval, 0b0000000000000);
|
||||
INSN(cbo_clean, 0b0000000000001);
|
||||
INSN(cbo_flush, 0b0000000000010);
|
||||
INSN(cbo_zero, 0b0000000000100);
|
||||
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, funct) \
|
||||
void NAME(Register Rs1, int32_t offset) { \
|
||||
guarantee((offset & 0x1f) == 0, "offset lowest 5 bits must be zero"); \
|
||||
int32_t upperOffset = offset >> 5; \
|
||||
unsigned insn = 0; \
|
||||
patch((address)&insn, 6, 0, 0b0010011); \
|
||||
patch((address)&insn, 14, 12, 0b110); \
|
||||
patch_reg((address)&insn, 15, Rs1); \
|
||||
patch((address)&insn, 24, 20, funct); \
|
||||
upperOffset &= 0x7f; \
|
||||
patch((address)&insn, 31, 25, upperOffset); \
|
||||
emit(insn); \
|
||||
}
|
||||
|
||||
INSN(prefetch_i, 0b0000000000000);
|
||||
INSN(prefetch_r, 0b0000000000001);
|
||||
INSN(prefetch_w, 0b0000000000011);
|
||||
|
||||
#undef INSN
|
||||
|
||||
// ---------------------------------------------------------------------------------------
|
||||
|
||||
#define INSN(NAME, REGISTER) \
|
||||
|
@ -49,4 +49,6 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
|
||||
|
||||
#define USE_POINTERS_TO_REGISTER_IMPL_ARRAY
|
||||
|
||||
#define DEFAULT_CACHE_LINE_SIZE 64
|
||||
|
||||
#endif // CPU_RISCV_GLOBALDEFINITIONS_RISCV_HPP
|
||||
|
@ -82,19 +82,30 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
\
|
||||
product(bool, NearCpool, true, \
|
||||
"constant pool is close to instructions") \
|
||||
product(bool, UseBlockZeroing, false, \
|
||||
"Use Zicboz for block zeroing") \
|
||||
product(intx, BlockZeroingLowLimit, 256, \
|
||||
"Minimum size in bytes when block zeroing will be used") \
|
||||
range(1, max_jint) \
|
||||
product(intx, CacheLineSize, DEFAULT_CACHE_LINE_SIZE, \
|
||||
"Size in bytes of a CPU cache line") \
|
||||
range(wordSize, max_jint) \
|
||||
product(bool, TraceTraps, false, "Trace all traps the signal handler") \
|
||||
/* For now we're going to be safe and add the I/O bits to userspace fences. */ \
|
||||
product(bool, UseConservativeFence, true, \
|
||||
"Extend i for r and o for w in the pred/succ flags of fence") \
|
||||
product(bool, AvoidUnalignedAccesses, true, \
|
||||
"Avoid generating unaligned memory accesses") \
|
||||
product(bool, UseRVA20U64, false, EXPERIMENTAL, "Use RVA20U64 profile") \
|
||||
product(bool, UseRVA22U64, false, EXPERIMENTAL, "Use RVA22U64 profile") \
|
||||
product(bool, UseRVV, false, EXPERIMENTAL, "Use RVV instructions") \
|
||||
product(bool, UseRVC, false, EXPERIMENTAL, "Use RVC instructions") \
|
||||
product(bool, UseZba, false, EXPERIMENTAL, "Use Zba instructions") \
|
||||
product(bool, UseZbb, false, EXPERIMENTAL, "Use Zbb instructions") \
|
||||
product(bool, UseZic64b, false, EXPERIMENTAL, "Use Zic64b instructions") \
|
||||
product(bool, UseZicbom, false, EXPERIMENTAL, "Use Zicbom instructions") \
|
||||
product(bool, UseZicbop, false, EXPERIMENTAL, "Use Zicbop instructions") \
|
||||
product(bool, UseZicboz, false, EXPERIMENTAL, "Use Zicboz instructions") \
|
||||
product(bool, UseRVVForBigIntegerShiftIntrinsics, true, \
|
||||
"Use RVV instructions for left/right shift of BigInteger")
|
||||
|
||||
|
@ -792,7 +792,7 @@ void MacroAssembler::la(Register Rd, const Address &adr) {
|
||||
|
||||
void MacroAssembler::la(Register Rd, Label &label) {
|
||||
IncompressibleRegion ir(this); // the label address may be patched back.
|
||||
la(Rd, target(label));
|
||||
wrap_label(Rd, label, &MacroAssembler::la);
|
||||
}
|
||||
|
||||
void MacroAssembler::li32(Register Rd, int32_t imm) {
|
||||
@ -3980,9 +3980,9 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
|
||||
andi(t0, cnt, i);
|
||||
beqz(t0, l);
|
||||
for (int j = 0; j < i; j++) {
|
||||
sd(zr, Address(ptr, 0));
|
||||
addi(ptr, ptr, 8);
|
||||
sd(zr, Address(ptr, j * wordSize));
|
||||
}
|
||||
addi(ptr, ptr, i * wordSize);
|
||||
bind(l);
|
||||
}
|
||||
{
|
||||
@ -4001,7 +4001,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt)
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Immediate count in HeapWords.
|
||||
void MacroAssembler::zero_words(Register base, u_int64_t cnt)
|
||||
void MacroAssembler::zero_words(Register base, uint64_t cnt)
|
||||
{
|
||||
assert_different_registers(base, t0, t1);
|
||||
|
||||
@ -4092,6 +4092,43 @@ void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
bind(fini);
|
||||
}
|
||||
|
||||
// Zero blocks of memory by using CBO.ZERO.
|
||||
//
|
||||
// Aligns the base address first sufficiently for CBO.ZERO, then uses
|
||||
// CBO.ZERO repeatedly for every full block. cnt is the size to be
|
||||
// zeroed in HeapWords. Returns the count of words left to be zeroed
|
||||
// in cnt.
|
||||
//
|
||||
// NOTE: This is intended to be used in the zero_blocks() stub. If
|
||||
// you want to use it elsewhere, note that cnt must be >= CacheLineSize.
|
||||
void MacroAssembler::zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2) {
|
||||
Label initial_table_end, loop;
|
||||
|
||||
// Align base with cache line size.
|
||||
neg(tmp1, base);
|
||||
andi(tmp1, tmp1, CacheLineSize - 1);
|
||||
|
||||
// tmp1: the number of bytes to be filled to align the base with cache line size.
|
||||
add(base, base, tmp1);
|
||||
srai(tmp2, tmp1, 3);
|
||||
sub(cnt, cnt, tmp2);
|
||||
srli(tmp2, tmp1, 1);
|
||||
la(tmp1, initial_table_end);
|
||||
sub(tmp2, tmp1, tmp2);
|
||||
jr(tmp2);
|
||||
for (int i = -CacheLineSize + wordSize; i < 0; i += wordSize) {
|
||||
sd(zr, Address(base, i));
|
||||
}
|
||||
bind(initial_table_end);
|
||||
|
||||
mv(tmp1, CacheLineSize / wordSize);
|
||||
bind(loop);
|
||||
cbo_zero(base);
|
||||
sub(cnt, cnt, tmp1);
|
||||
add(base, base, CacheLineSize);
|
||||
bge(cnt, tmp1, loop);
|
||||
}
|
||||
|
||||
#define FCVT_SAFE(FLOATCVT, FLOATEQ) \
|
||||
void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) { \
|
||||
Label L_Okay; \
|
||||
|
@ -1093,10 +1093,11 @@ public:
|
||||
|
||||
void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
|
||||
|
||||
void zero_words(Register base, u_int64_t cnt);
|
||||
void zero_words(Register base, uint64_t cnt);
|
||||
address zero_words(Register ptr, Register cnt);
|
||||
void fill_words(Register base, Register cnt, Register value);
|
||||
void zero_memory(Register addr, Register len, Register tmp);
|
||||
void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
|
||||
|
||||
// shift left by shamt and add
|
||||
void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
|
||||
|
@ -727,6 +727,10 @@ reg_class r30_reg(
|
||||
R30, R30_H
|
||||
);
|
||||
|
||||
reg_class r31_reg(
|
||||
R31, R31_H
|
||||
);
|
||||
|
||||
// Class for zero registesr
|
||||
reg_class zr_reg(
|
||||
R0, R0_H
|
||||
@ -3347,6 +3351,28 @@ operand iRegP_R28()
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Pointer 64 bit Register R30 only
|
||||
operand iRegP_R30()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(r30_reg));
|
||||
match(RegP);
|
||||
match(iRegPNoSp);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Pointer 64 bit Register R31 only
|
||||
operand iRegP_R31()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(r31_reg));
|
||||
match(RegP);
|
||||
match(iRegPNoSp);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Pointer Register Operands
|
||||
// Narrow Pointer Register
|
||||
operand iRegN()
|
||||
@ -10228,11 +10254,13 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
|
||||
%}
|
||||
|
||||
// clearing of an array
|
||||
instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy)
|
||||
instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegP_R30 tmp1,
|
||||
iRegP_R31 tmp2, Universe dummy)
|
||||
%{
|
||||
predicate(!UseRVV);
|
||||
// temp registers must match the one used in StubGenerator::generate_zero_blocks()
|
||||
predicate(UseBlockZeroing || !UseRVV);
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base);
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2);
|
||||
|
||||
ins_cost(4 * DEFAULT_COST);
|
||||
format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
|
||||
|
@ -2042,7 +2042,7 @@ instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
|
||||
instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
|
||||
vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
|
||||
%{
|
||||
predicate(UseRVV);
|
||||
predicate(!UseBlockZeroing && UseRVV);
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
|
||||
|
||||
|
@ -671,26 +671,36 @@ class StubGenerator: public StubCodeGenerator {
|
||||
address generate_zero_blocks() {
|
||||
Label done;
|
||||
|
||||
const Register base = x28, cnt = x29;
|
||||
const Register base = x28, cnt = x29, tmp1 = x30, tmp2 = x31;
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "zero_blocks");
|
||||
address start = __ pc();
|
||||
|
||||
if (UseBlockZeroing) {
|
||||
// Ensure count >= 2*CacheLineSize so that it still deserves a cbo.zero
|
||||
// after alignment.
|
||||
Label small;
|
||||
int low_limit = MAX2(2 * CacheLineSize, BlockZeroingLowLimit) / wordSize;
|
||||
__ mv(tmp1, low_limit);
|
||||
__ blt(cnt, tmp1, small);
|
||||
__ zero_dcache_blocks(base, cnt, tmp1, tmp2);
|
||||
__ bind(small);
|
||||
}
|
||||
|
||||
{
|
||||
// Clear the remaining blocks.
|
||||
Label loop;
|
||||
__ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
||||
__ bltz(cnt, done);
|
||||
__ mv(tmp1, MacroAssembler::zero_words_block_size);
|
||||
__ blt(cnt, tmp1, done);
|
||||
__ bind(loop);
|
||||
for (int i = 0; i < MacroAssembler::zero_words_block_size; i++) {
|
||||
__ sd(zr, Address(base, 0));
|
||||
__ add(base, base, 8);
|
||||
__ sd(zr, Address(base, i * wordSize));
|
||||
}
|
||||
__ add(base, base, MacroAssembler::zero_words_block_size * wordSize);
|
||||
__ sub(cnt, cnt, MacroAssembler::zero_words_block_size);
|
||||
__ bgez(cnt, loop);
|
||||
__ bge(cnt, tmp1, loop);
|
||||
__ bind(done);
|
||||
__ add(cnt, cnt, MacroAssembler::zero_words_block_size);
|
||||
}
|
||||
|
||||
__ ret();
|
||||
|
@ -36,6 +36,50 @@ uint32_t VM_Version::_initial_vector_length = 0;
|
||||
void VM_Version::initialize() {
|
||||
get_os_cpu_info();
|
||||
|
||||
// https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva20-profiles
|
||||
if (UseRVA20U64) {
|
||||
if (FLAG_IS_DEFAULT(UseRVC)) {
|
||||
FLAG_SET_DEFAULT(UseRVC, true);
|
||||
}
|
||||
}
|
||||
// https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva22-profiles
|
||||
if (UseRVA22U64) {
|
||||
if (FLAG_IS_DEFAULT(UseRVC)) {
|
||||
FLAG_SET_DEFAULT(UseRVC, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZba)) {
|
||||
FLAG_SET_DEFAULT(UseZba, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZbb)) {
|
||||
FLAG_SET_DEFAULT(UseZbb, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZic64b)) {
|
||||
FLAG_SET_DEFAULT(UseZic64b, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZicbom)) {
|
||||
FLAG_SET_DEFAULT(UseZicbom, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZicbop)) {
|
||||
FLAG_SET_DEFAULT(UseZicbop, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseZicboz)) {
|
||||
FLAG_SET_DEFAULT(UseZicboz, true);
|
||||
}
|
||||
}
|
||||
|
||||
if (UseZic64b) {
|
||||
if (CacheLineSize != 64) {
|
||||
assert(!FLAG_IS_DEFAULT(CacheLineSize), "default cache line size should be 64 bytes");
|
||||
warning("CacheLineSize is assumed to be 64 bytes because Zic64b is enabled");
|
||||
FLAG_SET_DEFAULT(CacheLineSize, 64);
|
||||
}
|
||||
} else {
|
||||
if (!FLAG_IS_DEFAULT(CacheLineSize) && !is_power_of_2(CacheLineSize)) {
|
||||
warning("CacheLineSize must be a power of 2");
|
||||
FLAG_SET_DEFAULT(CacheLineSize, DEFAULT_CACHE_LINE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseFMA)) {
|
||||
FLAG_SET_DEFAULT(UseFMA, true);
|
||||
}
|
||||
@ -127,6 +171,18 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
|
||||
}
|
||||
|
||||
if (UseZicboz) {
|
||||
if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
|
||||
FLAG_SET_DEFAULT(UseBlockZeroing, true);
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) {
|
||||
FLAG_SET_DEFAULT(BlockZeroingLowLimit, 2 * CacheLineSize);
|
||||
}
|
||||
} else if (UseBlockZeroing) {
|
||||
warning("Block zeroing is not available");
|
||||
FLAG_SET_DEFAULT(UseBlockZeroing, false);
|
||||
}
|
||||
|
||||
char buf[512];
|
||||
buf[0] = '\0';
|
||||
if (_uarch != NULL && strcmp(_uarch, "") != 0) snprintf(buf, sizeof(buf), "%s,", _uarch);
|
||||
|
Loading…
Reference in New Issue
Block a user