8331558: AArch64: optimize integer remainder

Reviewed-by: eliu, aph
This commit is contained in:
释天 2024-05-10 10:01:40 +00:00 committed by Eric Liu
parent 9f43ce5a72
commit dab92c51c7
4 changed files with 60 additions and 15 deletions

@ -2285,6 +2285,33 @@ Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_of
return Address(Rd);
}
// On Neoverse, MSUB uses the same ALU with other instructions (e.g. SDIV).
// The combination of MUL/SUB can utilize multiple ALUs,
// and can be somewhat faster than MSUB.
void MacroAssembler::msub(Register Rd, Register Rn, Register Rm, Register Ra)
{
if (VM_Version::supports_a53mac() && Ra != zr)
nop();
if (VM_Version::is_neoverse()) {
mul(rscratch1, Rn, Rm);
sub(Rd, Ra, rscratch1);
} else {
Assembler::msub(Rd, Rn, Rm, Ra);
}
}
void MacroAssembler::msubw(Register Rd, Register Rn, Register Rm, Register Ra)
{
if (VM_Version::supports_a53mac() && Ra != zr)
nop();
if (VM_Version::is_neoverse()) {
mulw(rscratch1, Rn, Rm);
subw(Rd, Ra, rscratch1);
} else {
Assembler::msubw(Rd, Rn, Rm, Ra);
}
}
int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
bool want_remainder, Register scratch)
{
@ -2309,7 +2336,7 @@ int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
sdivw(result, ra, rb);
} else {
sdivw(scratch, ra, rb);
Assembler::msubw(result, scratch, rb, ra);
msubw(result, scratch, rb, ra);
}
return idivl_offset;
@ -2339,7 +2366,7 @@ int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
sdiv(result, ra, rb);
} else {
sdiv(scratch, ra, rb);
Assembler::msub(result, scratch, rb, ra);
msub(result, scratch, rb, ra);
}
return idivq_offset;

@ -437,11 +437,14 @@ class MacroAssembler: public Assembler {
Assembler::INSN(Rd, Rn, Rm, Ra); \
}
WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
WRAP(madd) WRAP(maddw)
WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
#undef WRAP
void msub(Register Rd, Register Rn, Register Rm, Register Ra);
void msubw(Register Rd, Register Rn, Register Rm, Register Ra);
// macro assembly operations needed for aarch64
public:

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -212,13 +212,7 @@ void VM_Version::initialize() {
}
}
// Neoverse
// N1: 0xd0c
// N2: 0xd49
// V1: 0xd40
// V2: 0xd4f
if (_cpu == CPU_ARM && (model_is(0xd0c) || model_is(0xd49) ||
model_is(0xd40) || model_is(0xd4f))) {
if (is_neoverse()) {
if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true);
}
@ -247,10 +241,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32, false);
}
// Neoverse
// V1: 0xd40
// V2: 0xd4f
if (_cpu == CPU_ARM && (model_is(0xd40) || model_is(0xd4f))) {
if (is_neoverse_v_series()) {
if (FLAG_IS_DEFAULT(UseCryptoPmullForCRC32)) {
FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, true);
}

@ -114,6 +114,13 @@ enum Ampere_CPU_Model {
CPU_MODEL_AMPERE_1B = 0xac5 /* AMPERE_1B core Implements ARMv8.7 with CSSC, MTE, SM3/SM4 extensions */
};
enum Neoverse_CPU_Model {
CPU_MODEL_NEOVERSE_N1 = 0xd0c,
CPU_MODEL_NEOVERSE_N2 = 0xd49,
CPU_MODEL_NEOVERSE_V1 = 0xd40,
CPU_MODEL_NEOVERSE_V2 = 0xd4f,
};
#define CPU_FEATURE_FLAGS(decl) \
decl(FP, fp, 0) \
decl(ASIMD, asimd, 1) \
@ -156,6 +163,23 @@ enum Ampere_CPU_Model {
return _model == cpu_model || _model2 == cpu_model;
}
static bool is_neoverse() {
switch(_model) {
case CPU_MODEL_NEOVERSE_N1:
case CPU_MODEL_NEOVERSE_N2:
case CPU_MODEL_NEOVERSE_V1:
case CPU_MODEL_NEOVERSE_V2:
return true;
default:
return false;
}
}
static bool is_neoverse_v_series() {
return (model_is(CPU_MODEL_NEOVERSE_V1) || model_is(CPU_MODEL_NEOVERSE_V2));
}
static bool is_zva_enabled() { return 0 <= _zva_length; }
static int zva_length() {
assert(is_zva_enabled(), "ZVA not available");