8331558: AArch64: optimize integer remainder
Reviewed-by: eliu, aph
This commit is contained in:
parent
9f43ce5a72
commit
dab92c51c7
@ -2285,6 +2285,33 @@ Address MacroAssembler::form_address(Register Rd, Register base, int64_t byte_of
|
||||
return Address(Rd);
|
||||
}
|
||||
|
||||
// On Neoverse, MSUB uses the same ALU with other instructions (e.g. SDIV).
|
||||
// The combination of MUL/SUB can utilize multiple ALUs,
|
||||
// and can be somewhat faster than MSUB.
|
||||
void MacroAssembler::msub(Register Rd, Register Rn, Register Rm, Register Ra)
|
||||
{
|
||||
if (VM_Version::supports_a53mac() && Ra != zr)
|
||||
nop();
|
||||
if (VM_Version::is_neoverse()) {
|
||||
mul(rscratch1, Rn, Rm);
|
||||
sub(Rd, Ra, rscratch1);
|
||||
} else {
|
||||
Assembler::msub(Rd, Rn, Rm, Ra);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::msubw(Register Rd, Register Rn, Register Rm, Register Ra)
|
||||
{
|
||||
if (VM_Version::supports_a53mac() && Ra != zr)
|
||||
nop();
|
||||
if (VM_Version::is_neoverse()) {
|
||||
mulw(rscratch1, Rn, Rm);
|
||||
subw(Rd, Ra, rscratch1);
|
||||
} else {
|
||||
Assembler::msubw(Rd, Rn, Rm, Ra);
|
||||
}
|
||||
}
|
||||
|
||||
int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
|
||||
bool want_remainder, Register scratch)
|
||||
{
|
||||
@ -2309,7 +2336,7 @@ int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
|
||||
sdivw(result, ra, rb);
|
||||
} else {
|
||||
sdivw(scratch, ra, rb);
|
||||
Assembler::msubw(result, scratch, rb, ra);
|
||||
msubw(result, scratch, rb, ra);
|
||||
}
|
||||
|
||||
return idivl_offset;
|
||||
@ -2339,7 +2366,7 @@ int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
|
||||
sdiv(result, ra, rb);
|
||||
} else {
|
||||
sdiv(scratch, ra, rb);
|
||||
Assembler::msub(result, scratch, rb, ra);
|
||||
msub(result, scratch, rb, ra);
|
||||
}
|
||||
|
||||
return idivq_offset;
|
||||
|
@ -437,11 +437,14 @@ class MacroAssembler: public Assembler {
|
||||
Assembler::INSN(Rd, Rn, Rm, Ra); \
|
||||
}
|
||||
|
||||
WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
|
||||
WRAP(madd) WRAP(maddw)
|
||||
WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
|
||||
#undef WRAP
|
||||
|
||||
|
||||
void msub(Register Rd, Register Rn, Register Rm, Register Ra);
|
||||
void msubw(Register Rd, Register Rn, Register Rm, Register Ra);
|
||||
|
||||
// macro assembly operations needed for aarch64
|
||||
|
||||
public:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2020, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -212,13 +212,7 @@ void VM_Version::initialize() {
|
||||
}
|
||||
}
|
||||
|
||||
// Neoverse
|
||||
// N1: 0xd0c
|
||||
// N2: 0xd49
|
||||
// V1: 0xd40
|
||||
// V2: 0xd4f
|
||||
if (_cpu == CPU_ARM && (model_is(0xd0c) || model_is(0xd49) ||
|
||||
model_is(0xd40) || model_is(0xd4f))) {
|
||||
if (is_neoverse()) {
|
||||
if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
|
||||
FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true);
|
||||
}
|
||||
@ -247,10 +241,7 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseCRC32, false);
|
||||
}
|
||||
|
||||
// Neoverse
|
||||
// V1: 0xd40
|
||||
// V2: 0xd4f
|
||||
if (_cpu == CPU_ARM && (model_is(0xd40) || model_is(0xd4f))) {
|
||||
if (is_neoverse_v_series()) {
|
||||
if (FLAG_IS_DEFAULT(UseCryptoPmullForCRC32)) {
|
||||
FLAG_SET_DEFAULT(UseCryptoPmullForCRC32, true);
|
||||
}
|
||||
|
@ -114,6 +114,13 @@ enum Ampere_CPU_Model {
|
||||
CPU_MODEL_AMPERE_1B = 0xac5 /* AMPERE_1B core Implements ARMv8.7 with CSSC, MTE, SM3/SM4 extensions */
|
||||
};
|
||||
|
||||
enum Neoverse_CPU_Model {
|
||||
CPU_MODEL_NEOVERSE_N1 = 0xd0c,
|
||||
CPU_MODEL_NEOVERSE_N2 = 0xd49,
|
||||
CPU_MODEL_NEOVERSE_V1 = 0xd40,
|
||||
CPU_MODEL_NEOVERSE_V2 = 0xd4f,
|
||||
};
|
||||
|
||||
#define CPU_FEATURE_FLAGS(decl) \
|
||||
decl(FP, fp, 0) \
|
||||
decl(ASIMD, asimd, 1) \
|
||||
@ -156,6 +163,23 @@ enum Ampere_CPU_Model {
|
||||
return _model == cpu_model || _model2 == cpu_model;
|
||||
}
|
||||
|
||||
|
||||
static bool is_neoverse() {
|
||||
switch(_model) {
|
||||
case CPU_MODEL_NEOVERSE_N1:
|
||||
case CPU_MODEL_NEOVERSE_N2:
|
||||
case CPU_MODEL_NEOVERSE_V1:
|
||||
case CPU_MODEL_NEOVERSE_V2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_neoverse_v_series() {
|
||||
return (model_is(CPU_MODEL_NEOVERSE_V1) || model_is(CPU_MODEL_NEOVERSE_V2));
|
||||
}
|
||||
|
||||
static bool is_zva_enabled() { return 0 <= _zva_length; }
|
||||
static int zva_length() {
|
||||
assert(is_zva_enabled(), "ZVA not available");
|
||||
|
Loading…
x
Reference in New Issue
Block a user