8326541: [AArch64] ZGC C2 load barrier stub should consider the length of live registers when spilling registers

Reviewed-by: eosterlund, rcastanedalo
This commit is contained in:
Joshua Zhu 2024-04-24 05:44:39 +00:00 committed by Tobias Hartmann
parent 438e64310d
commit 5c3838605d
7 changed files with 704 additions and 67 deletions

View File

@ -1126,6 +1126,10 @@ extern RegMask _NO_SPECIAL_REG32_mask;
extern RegMask _NO_SPECIAL_REG_mask;
extern RegMask _NO_SPECIAL_PTR_REG_mask;
// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
class CallStubImpl {
//--------------------------------------------------------------
@ -1900,10 +1904,6 @@ const Pipeline * MachEpilogNode::pipeline() const {
//=============================================================================
// Figure out which register class each belongs in: rc_int, rc_float or
// rc_stack.
enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack };
static enum RC rc_class(OptoReg::Name reg) {
if (reg == OptoReg::Bad) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1081,6 +1081,35 @@ void ZBarrierSetAssembler::generate_c1_store_barrier_runtime_stub(StubAssembler*
#ifdef COMPILER2
OptoReg::Name ZBarrierSetAssembler::encode_float_vector_register_size(const Node* node, OptoReg::Name opto_reg) {
switch (node->ideal_reg()) {
case Op_RegF:
// No need to refine. The original encoding is already fine to distinguish.
assert(opto_reg % 4 == 0, "Float register should only occupy a single slot");
break;
// Use different encoding values of the same fp/vector register to help distinguish different sizes.
// Such as V16. The OptoReg::name and its corresponding slot value are
// "V16": 64, "V16_H": 65, "V16_J": 66, "V16_K": 67.
case Op_RegD:
case Op_VecD:
opto_reg &= ~3;
opto_reg |= 1;
break;
case Op_VecX:
opto_reg &= ~3;
opto_reg |= 2;
break;
case Op_VecA:
opto_reg &= ~3;
opto_reg |= 3;
break;
default:
assert(false, "unexpected ideal register");
ShouldNotReachHere();
}
return opto_reg;
}
OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
if (!OptoReg::is_reg(opto_reg)) {
return OptoReg::Bad;
@ -1088,7 +1117,7 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_FloatRegister()) {
return opto_reg & ~1;
opto_reg = encode_float_vector_register_size(node, opto_reg);
}
return opto_reg;
@ -1099,28 +1128,82 @@ OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::N
class ZSaveLiveRegisters {
private:
struct RegisterData {
VMReg _reg;
int _slots; // slots occupied once pushed into stack
// Used by GrowableArray::find()
bool operator == (const RegisterData& other) {
return _reg == other._reg;
}
};
MacroAssembler* const _masm;
RegSet _gp_regs;
FloatRegSet _fp_regs;
FloatRegSet _neon_regs;
FloatRegSet _sve_regs;
PRegSet _p_regs;
public:
void initialize(ZBarrierStubC2* stub) {
// Record registers that needs to be saved/restored
int index = -1;
GrowableArray<RegisterData> registers;
VMReg prev_vm_reg = VMRegImpl::Bad();
RegMaskIterator rmi(stub->live());
while (rmi.has_next()) {
const OptoReg::Name opto_reg = rmi.next();
if (OptoReg::is_reg(opto_reg)) {
const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_Register()) {
_gp_regs += RegSet::of(vm_reg->as_Register());
} else if (vm_reg->is_FloatRegister()) {
_fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
} else if (vm_reg->is_PRegister()) {
_p_regs += PRegSet::of(vm_reg->as_PRegister());
OptoReg::Name opto_reg = rmi.next();
VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
if (vm_reg->is_Register()) {
// GPR may have one or two slots in regmask
// Determine whether the current vm_reg is the same physical register as the previous one
if (is_same_register(vm_reg, prev_vm_reg)) {
registers.at(index)._slots++;
} else {
fatal("Unknown register type");
RegisterData reg_data = { vm_reg, 1 };
index = registers.append(reg_data);
}
} else if (vm_reg->is_FloatRegister()) {
// We have size encoding in OptoReg of stub->live()
// After encoding, float/neon/sve register has only one slot in regmask
// Decode it to get the actual size
VMReg vm_reg_base = vm_reg->as_FloatRegister()->as_VMReg();
int slots = decode_float_vector_register_size(opto_reg);
RegisterData reg_data = { vm_reg_base, slots };
index = registers.append(reg_data);
} else if (vm_reg->is_PRegister()) {
// PRegister has only one slot in regmask
RegisterData reg_data = { vm_reg, 1 };
index = registers.append(reg_data);
} else {
assert(false, "Unknown register type");
ShouldNotReachHere();
}
prev_vm_reg = vm_reg;
}
// Record registers that needs to be saved/restored
for (GrowableArrayIterator<RegisterData> it = registers.begin(); it != registers.end(); ++it) {
RegisterData reg_data = *it;
VMReg vm_reg = reg_data._reg;
int slots = reg_data._slots;
if (vm_reg->is_Register()) {
assert(slots == 1 || slots == 2, "Unexpected register save size");
_gp_regs += RegSet::of(vm_reg->as_Register());
} else if (vm_reg->is_FloatRegister()) {
if (slots == 1 || slots == 2) {
_fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
} else if (slots == 4) {
_neon_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
} else {
assert(slots == Matcher::scalable_vector_reg_size(T_FLOAT), "Unexpected register save size");
_sve_regs += FloatRegSet::of(vm_reg->as_FloatRegister());
}
} else {
assert(vm_reg->is_PRegister() && slots == 1, "Unknown register type");
_p_regs += PRegSet::of(vm_reg->as_PRegister());
}
}
@ -1130,12 +1213,65 @@ public:
} else {
_gp_regs -= RegSet::range(r19, r30) + RegSet::of(r8, r9);
}
// Remove C-ABI SOE fp registers
_fp_regs -= FloatRegSet::range(v8, v15);
}
static enum RC rc_class(VMReg reg) {
if (reg->is_reg()) {
if (reg->is_Register()) {
return rc_int;
} else if (reg->is_FloatRegister()) {
return rc_float;
} else if (reg->is_PRegister()) {
return rc_predicate;
}
}
if (reg->is_stack()) {
return rc_stack;
}
return rc_bad;
}
static bool is_same_register(VMReg reg1, VMReg reg2) {
if (reg1 == reg2) {
return true;
}
if (rc_class(reg1) == rc_class(reg2)) {
if (reg1->is_Register()) {
return reg1->as_Register() == reg2->as_Register();
} else if (reg1->is_FloatRegister()) {
return reg1->as_FloatRegister() == reg2->as_FloatRegister();
} else if (reg1->is_PRegister()) {
return reg1->as_PRegister() == reg2->as_PRegister();
}
}
return false;
}
static int decode_float_vector_register_size(OptoReg::Name opto_reg) {
switch (opto_reg & 3) {
case 0:
return 1;
case 1:
return 2;
case 2:
return 4;
case 3:
return Matcher::scalable_vector_reg_size(T_FLOAT);
default:
ShouldNotReachHere();
return 0;
}
}
ZSaveLiveRegisters(MacroAssembler* masm, ZBarrierStubC2* stub)
: _masm(masm),
_gp_regs(),
_fp_regs(),
_neon_regs(),
_sve_regs(),
_p_regs() {
// Figure out what registers to save/restore
@ -1143,14 +1279,18 @@ public:
// Save registers
__ push(_gp_regs, sp);
__ push_fp(_fp_regs, sp);
__ push_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
__ push_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
__ push_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
__ push_p(_p_regs, sp);
}
~ZSaveLiveRegisters() {
// Restore registers
__ pop_p(_p_regs, sp);
__ pop_fp(_fp_regs, sp);
__ pop_fp(_sve_regs, sp, MacroAssembler::PushPopSVE);
__ pop_fp(_neon_regs, sp, MacroAssembler::PushPopNeon);
__ pop_fp(_fp_regs, sp, MacroAssembler::PushPopFp);
// External runtime call may clobber ptrue reg
__ reinitialize_ptrue();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -187,6 +187,9 @@ public:
#endif // COMPILER1
#ifdef COMPILER2
OptoReg::Name encode_float_vector_register_size(const Node* node,
OptoReg::Name opto_reg);
OptoReg::Name refine_register(const Node* node,
OptoReg::Name opto_reg);

View File

@ -2641,7 +2641,7 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
// Push lots of registers in the bit set supplied. Don't push sp.
// Return the number of dwords pushed
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
int MacroAssembler::push_fp(unsigned int bitset, Register stack, FpPushPopMode mode) {
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
@ -2664,8 +2664,29 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return 0;
}
// SVE
if (use_sve && sve_vector_size_in_bytes > 16) {
if (mode == PushPopFull) {
if (use_sve && sve_vector_size_in_bytes > 16) {
mode = PushPopSVE;
} else {
mode = PushPopNeon;
}
}
#ifndef PRODUCT
{
char buffer[48];
if (mode == PushPopSVE) {
snprintf(buffer, sizeof(buffer), "push_fp: %d SVE registers", count);
} else if (mode == PushPopNeon) {
snprintf(buffer, sizeof(buffer), "push_fp: %d Neon registers", count);
} else {
snprintf(buffer, sizeof(buffer), "push_fp: %d fp registers", count);
}
block_comment(buffer);
}
#endif
if (mode == PushPopSVE) {
sub(stack, stack, sve_vector_size_in_bytes * count);
for (int i = 0; i < count; i++) {
sve_str(as_FloatRegister(regs[i]), Address(stack, i));
@ -2673,35 +2694,67 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return count * sve_vector_size_in_bytes / 8;
}
// NEON
if (count == 1) {
strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2)));
return 2;
}
if (mode == PushPopNeon) {
if (count == 1) {
strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2)));
return 2;
}
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
// Always pushing full 128 bit registers.
stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -push_slots * wordSize * 2)));
words_pushed += 2;
for (int i = 2; i + 1 < count; i += 2) {
stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
// Always pushing full 128 bit registers.
stpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -push_slots * wordSize * 2)));
words_pushed += 2;
for (int i = 2; i + 1 < count; i += 2) {
stpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
words_pushed += 2;
}
if (odd) {
strq(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize * 2));
words_pushed++;
}
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
return count * 2;
}
if (odd) {
strq(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize * 2));
words_pushed++;
if (mode == PushPopFp) {
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
if (count == 1) {
// Stack pointer must be 16 bytes aligned
strd(as_FloatRegister(regs[0]), Address(pre(stack, -push_slots * wordSize)));
return 1;
}
stpd(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(pre(stack, -push_slots * wordSize)));
words_pushed += 2;
for (int i = 2; i + 1 < count; i += 2) {
stpd(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize));
words_pushed += 2;
}
if (odd) {
// Stack pointer must be 16 bytes aligned
strd(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize));
words_pushed++;
}
assert(words_pushed == count, "oops, pushed != count");
return count;
}
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
return count * 2;
return 0;
}
// Return the number of dwords popped
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
int MacroAssembler::pop_fp(unsigned int bitset, Register stack, FpPushPopMode mode) {
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
@ -2723,8 +2776,29 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return 0;
}
// SVE
if (use_sve && sve_vector_size_in_bytes > 16) {
if (mode == PushPopFull) {
if (use_sve && sve_vector_size_in_bytes > 16) {
mode = PushPopSVE;
} else {
mode = PushPopNeon;
}
}
#ifndef PRODUCT
{
char buffer[48];
if (mode == PushPopSVE) {
snprintf(buffer, sizeof(buffer), "pop_fp: %d SVE registers", count);
} else if (mode == PushPopNeon) {
snprintf(buffer, sizeof(buffer), "pop_fp: %d Neon registers", count);
} else {
snprintf(buffer, sizeof(buffer), "pop_fp: %d fp registers", count);
}
block_comment(buffer);
}
#endif
if (mode == PushPopSVE) {
for (int i = count - 1; i >= 0; i--) {
sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
}
@ -2732,31 +2806,61 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return count * sve_vector_size_in_bytes / 8;
}
// NEON
if (count == 1) {
ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2)));
return 2;
}
if (mode == PushPopNeon) {
if (count == 1) {
ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2)));
return 2;
}
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
if (odd) {
ldrq(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize * 2));
words_pushed++;
}
if (odd) {
ldrq(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize * 2));
words_pushed++;
}
for (int i = 2; i + 1 < count; i += 2) {
ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
for (int i = 2; i + 1 < count; i += 2) {
ldpq(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize * 2));
words_pushed += 2;
}
ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, push_slots * wordSize * 2)));
words_pushed += 2;
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
return count * 2;
}
ldpq(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, push_slots * wordSize * 2)));
words_pushed += 2;
if (mode == PushPopFp) {
bool odd = (count & 1) == 1;
int push_slots = count + (odd ? 1 : 0);
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
if (count == 1) {
ldrd(as_FloatRegister(regs[0]), Address(post(stack, push_slots * wordSize)));
return 1;
}
return count * 2;
if (odd) {
ldrd(as_FloatRegister(regs[count - 1]), Address(stack, (count - 1) * wordSize));
words_pushed++;
}
for (int i = 2; i + 1 < count; i += 2) {
ldpd(as_FloatRegister(regs[i]), as_FloatRegister(regs[i+1]), Address(stack, i * wordSize));
words_pushed += 2;
}
ldpd(as_FloatRegister(regs[0]), as_FloatRegister(regs[1]), Address(post(stack, push_slots * wordSize)));
words_pushed += 2;
assert(words_pushed == count, "oops, pushed != count");
return count;
}
return 0;
}
// Return the number of dwords pushed

View File

@ -444,6 +444,15 @@ class MacroAssembler: public Assembler {
// macro assembly operations needed for aarch64
public:
enum FpPushPopMode {
PushPopFull,
PushPopSVE,
PushPopNeon,
PushPopFp
};
// first two private routines for loading 32 bit or 64 bit constants
private:
@ -453,8 +462,8 @@ private:
int push(unsigned int bitset, Register stack);
int pop(unsigned int bitset, Register stack);
int push_fp(unsigned int bitset, Register stack);
int pop_fp(unsigned int bitset, Register stack);
int push_fp(unsigned int bitset, Register stack, FpPushPopMode mode);
int pop_fp(unsigned int bitset, Register stack, FpPushPopMode mode);
int push_p(unsigned int bitset, Register stack);
int pop_p(unsigned int bitset, Register stack);
@ -462,11 +471,12 @@ private:
void mov(Register dst, Address a);
public:
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
void push_fp(FloatRegSet regs, Register stack, FpPushPopMode mode = PushPopFull) { if (regs.bits()) push_fp(regs.bits(), stack, mode); }
void pop_fp(FloatRegSet regs, Register stack, FpPushPopMode mode = PushPopFull) { if (regs.bits()) pop_fp(regs.bits(), stack, mode); }
static RegSet call_clobbered_gp_registers();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -45,7 +45,7 @@ inline Register as_Register() {
}
inline FloatRegister as_FloatRegister() {
assert( is_FloatRegister() && is_even(value()), "must be" );
assert( is_FloatRegister(), "must be" );
// Yuk
return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
FloatRegister::max_slots_per_register);

View File

@ -0,0 +1,380 @@
/*
* Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package gc.z;
/**
* @test TestRegistersPushPopAtZGCLoadBarrierStub
* @bug 8326541
* @summary Test to verify that registers are saved and restored correctly based on
the actual register usage length on aarch64 when entering load barrier stub.
* @library /test/lib /
* @modules jdk.incubator.vector
*
* @requires vm.gc.ZGenerational & vm.debug
* @requires os.arch=="aarch64"
*
* @run driver gc.z.TestRegistersPushPopAtZGCLoadBarrierStub
*/
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.random.RandomGenerator;
import java.util.random.RandomGeneratorFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.Vector;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import jdk.test.lib.Asserts;
import jdk.test.lib.process.OutputAnalyzer;
import jdk.test.lib.process.ProcessTools;
class Inner {}
class InnerFloat extends Inner {
float data;
public InnerFloat(float f) {
data = f;
}
}
class InnerDouble extends Inner {
double data;
public InnerDouble(double f) {
data = f;
}
}
class Outer {
volatile Inner field;
public Outer(Inner i) {
field = i;
}
}
public class TestRegistersPushPopAtZGCLoadBarrierStub {
class Launcher {
private final static int NUM = 1024;
private final static int ITERATIONS = 20_000;
private final static RandomGenerator RANDOM = RandomGeneratorFactory.getDefault().create(0);
private final static Map<String, Runnable> TESTS;
private static float[] f_array;
private static Outer f_outer;
private static Outer d_outer;
static {
f_array = new float[NUM];
for (int i = 0; i < NUM; i++) {
f_array[i] = RANDOM.nextFloat();
}
InnerFloat f_inner = new InnerFloat(RANDOM.nextFloat());
InnerDouble d_inner = new InnerDouble(RANDOM.nextDouble());
f_outer = new Outer(f_inner);
d_outer = new Outer(d_inner);
TESTS = new LinkedHashMap<>();
TESTS.put("test_one_float_push_pop_at_load_barrier", Launcher::test_one_float);
TESTS.put("test_two_floats_push_pop_at_load_barrier", Launcher::test_two_floats);
TESTS.put("test_three_floats_push_pop_at_load_barrier", Launcher::test_three_floats);
TESTS.put("test_one_double_push_pop_at_load_barrier", Launcher::test_one_double);
TESTS.put("test_two_doubles_push_pop_at_load_barrier", Launcher::test_two_doubles);
TESTS.put("test_three_doubles_push_pop_at_load_barrier", Launcher::test_three_doubles);
TESTS.put("test_one_vector_128_push_pop_at_load_barrier", Launcher::test_one_vector_128);
TESTS.put("test_two_vectors_128_push_pop_at_load_barrier", Launcher::test_two_vectors_128);
TESTS.put("test_three_vectors_128_push_pop_at_load_barrier", Launcher::test_three_vectors_128);
TESTS.put("test_vector_max_push_pop_at_load_barrier", Launcher::test_vector_max);
TESTS.put("test_float_and_vector_push_pop_at_load_barrier", Launcher::test_float_and_vector);
}
static float test_one_float_push_pop_at_load_barrier(Outer outer, float f) {
Inner inner = outer.field;
return f + ((InnerFloat)inner).data;
}
static float test_two_floats_push_pop_at_load_barrier(Outer outer, float f1, float f2) {
Inner inner = outer.field;
return f1 + f2 + ((InnerFloat)inner).data;
}
static float test_three_floats_push_pop_at_load_barrier(Outer outer, float f1, float f2, float f3) {
Inner inner = outer.field;
return f1 + f2 + f3 + ((InnerFloat)inner).data;
}
static double test_one_double_push_pop_at_load_barrier(Outer outer, double d) {
Inner inner = outer.field;
return d + ((InnerDouble)inner).data;
}
static double test_two_doubles_push_pop_at_load_barrier(Outer outer, double d1, double d2) {
Inner inner = outer.field;
return d1 + d2 + ((InnerDouble)inner).data;
}
static double test_three_doubles_push_pop_at_load_barrier(Outer outer, double d1, double d2, double d3) {
Inner inner = outer.field;
return d1 + d2 + d3 + ((InnerDouble)inner).data;
}
static void test_one_vector_128_push_pop_at_load_barrier(float[] b, Outer outer) {
VectorSpecies<Float> float_species = FloatVector.SPECIES_128;
FloatVector av = FloatVector.zero(float_species);
for (int i = 0; i < b.length; i += float_species.length()) {
Inner inner = outer.field;
FloatVector bv = FloatVector.fromArray(float_species, b, i);
float value = ((InnerFloat)inner).data;
av = av.add(bv).add(value);
}
}
static void test_two_vectors_128_push_pop_at_load_barrier(float[] b, Outer outer) {
VectorSpecies<Float> float_species = FloatVector.SPECIES_128;
FloatVector av1 = FloatVector.zero(float_species);
FloatVector av2 = FloatVector.zero(float_species);
for (int i = 0; i < b.length; i += float_species.length()) {
Inner inner = outer.field;
FloatVector bv = FloatVector.fromArray(float_species, b, i);
float value = ((InnerFloat)inner).data;
av1 = av1.add(bv).add(value);
av2 = av2.add(av1);
}
}
static void test_three_vectors_128_push_pop_at_load_barrier(float[] b, Outer outer) {
VectorSpecies<Float> float_species = FloatVector.SPECIES_128;
FloatVector av1 = FloatVector.zero(float_species);
FloatVector av2 = FloatVector.zero(float_species);
FloatVector av3 = FloatVector.zero(float_species);
for (int i = 0; i < b.length; i += float_species.length()) {
Inner inner = outer.field;
FloatVector bv = FloatVector.fromArray(float_species, b, i);
float value = ((InnerFloat)inner).data;
av1 = av1.add(bv).add(value);
av2 = av2.add(av1);
av3 = av3.add(av2);
}
}
static void test_vector_max_push_pop_at_load_barrier(float[] b, Outer outer) {
VectorSpecies<Float> float_species = FloatVector.SPECIES_MAX;
FloatVector av = FloatVector.zero(float_species);
for (int i = 0; i < b.length; i += float_species.length()) {
Inner inner = outer.field;
FloatVector bv = FloatVector.fromArray(float_species, b, i);
float value = ((InnerFloat)inner).data;
av = av.add(bv).add(value);
}
}
static void test_float_and_vector_push_pop_at_load_barrier(float[] b, Outer outer, float f) {
VectorSpecies<Float> float_species = FloatVector.SPECIES_MAX;
FloatVector av = FloatVector.zero(float_species);
for (int i = 0; i < b.length; i += float_species.length()) {
Inner inner = outer.field;
FloatVector bv = FloatVector.fromArray(float_species, b, i);
float value = ((InnerFloat)inner).data + f;
av = av.add(bv).add(value);
}
}
static void test_one_float() {
for (int i = 0; i < ITERATIONS; i++) {
test_one_float_push_pop_at_load_barrier(f_outer, RANDOM.nextFloat());
}
}
static void test_two_floats() {
for (int i = 0; i < ITERATIONS; i++) {
test_two_floats_push_pop_at_load_barrier(f_outer, RANDOM.nextFloat(), RANDOM.nextFloat());
}
}
static void test_three_floats() {
for (int i = 0; i < ITERATIONS; i++) {
test_three_floats_push_pop_at_load_barrier(f_outer, RANDOM.nextFloat(), RANDOM.nextFloat(), RANDOM.nextFloat());
}
}
static void test_one_double() {
for (int i = 0; i < ITERATIONS; i++) {
test_one_double_push_pop_at_load_barrier(d_outer, RANDOM.nextDouble());
}
}
static void test_two_doubles() {
for (int i = 0; i < ITERATIONS; i++) {
test_two_doubles_push_pop_at_load_barrier(d_outer, RANDOM.nextDouble(), RANDOM.nextDouble());
}
}
static void test_three_doubles() {
for (int i = 0; i < ITERATIONS; i++) {
test_three_doubles_push_pop_at_load_barrier(d_outer, RANDOM.nextDouble(), RANDOM.nextDouble(), RANDOM.nextDouble());
}
}
static void test_one_vector_128() {
for (int i = 0; i < ITERATIONS; i++) {
test_one_vector_128_push_pop_at_load_barrier(f_array, f_outer);
}
}
static void test_two_vectors_128() {
for (int i = 0; i < ITERATIONS; i++) {
test_two_vectors_128_push_pop_at_load_barrier(f_array, f_outer);
}
}
static void test_three_vectors_128() {
for (int i = 0; i < ITERATIONS; i++) {
test_three_vectors_128_push_pop_at_load_barrier(f_array, f_outer);
}
}
static void test_vector_max() {
for (int i = 0; i < ITERATIONS; i++) {
test_vector_max_push_pop_at_load_barrier(f_array, f_outer);
}
}
static void test_float_and_vector() {
for (int i = 0; i < ITERATIONS; i++) {
test_float_and_vector_push_pop_at_load_barrier(f_array, f_outer, RANDOM.nextFloat());
}
}
public static void main(String args[]) {
Runnable r = TESTS.get(args[0]);
r.run();
}
}
static boolean containOnlyOneOccuranceOfKeyword(String text, String keyword) {
int firstIndex = text.indexOf(keyword);
int lastIndex = text.lastIndexOf(keyword);
return firstIndex != -1 && firstIndex == lastIndex;
}
// Check that registers are pushed and poped with correct register type and number
static void checkPushPopRegNumberAndType(String stdout, String keyword, String expected_freg_type,
int expected_number_of_fregs) throws Exception {
String expected = keyword + expected_number_of_fregs + " " + expected_freg_type + " registers";
String regex = keyword + "(\\d+) " + expected_freg_type + " registers";
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(stdout);
if (m.find()) {
String found = m.group();
Asserts.assertEquals(found, expected, "found '" + found + "' but should print '" + expected + "'");
} else {
throw new RuntimeException("'" + regex + "' is not found in stdout");
}
if (m.find()) {
throw new RuntimeException("Stdout is expected to contain only one occurance of '" + regex +
"'. Found another occurance: '" + m.group() + "'");
}
}
static String launchJavaTestProcess(String test_name) throws Exception {
ArrayList<String> command = new ArrayList<String>();
command.add("-Xbatch");
command.add("-XX:LoopUnrollLimit=0");
command.add("-XX:-UseOnStackReplacement");
command.add("-XX:-TieredCompilation");
command.add("-XX:+UseZGC");
command.add("-XX:+ZGenerational");
command.add("--add-modules=jdk.incubator.vector");
command.add("-XX:CompileCommand=print," + Launcher.class.getName() + "::" + test_name);
command.add(Launcher.class.getName());
command.add(test_name);
ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder(command);
OutputAnalyzer output = new OutputAnalyzer(pb.start());
output.shouldHaveExitValue(0);
return output.getStdout();
}
static void run_test(String test_name, String expected_freg_type, int expected_number_of_fregs,
String expected_vector_reg_type, int expected_number_of_vector_regs) throws Exception {
String stdout = launchJavaTestProcess(test_name);
String keyword = "push_fp: ";
checkPushPopRegNumberAndType(stdout, keyword, expected_freg_type, expected_number_of_fregs);
checkPushPopRegNumberAndType(stdout, keyword, expected_vector_reg_type, expected_number_of_vector_regs);
keyword = "pop_fp: ";
checkPushPopRegNumberAndType(stdout, keyword, expected_freg_type, expected_number_of_fregs);
checkPushPopRegNumberAndType(stdout, keyword, expected_vector_reg_type, expected_number_of_vector_regs);
}
static void run_test(String test_name, String expected_freg_type, int expected_number_of_fregs) throws Exception {
String stdout = launchJavaTestProcess(test_name);
String keyword = "push_fp: ";
if (!containOnlyOneOccuranceOfKeyword(stdout, keyword)) {
throw new RuntimeException("Stdout is expected to contain only one occurance of keyword: " + "'" + keyword + "'");
}
checkPushPopRegNumberAndType(stdout, keyword, expected_freg_type, expected_number_of_fregs);
keyword = "pop_fp: ";
if (!containOnlyOneOccuranceOfKeyword(stdout, keyword)) {
throw new RuntimeException("Stdout is expected to contain only one occurance of keyword: " + "'" + keyword + "'");
}
checkPushPopRegNumberAndType(stdout, keyword, expected_freg_type, expected_number_of_fregs);
}
public static void main(String[] args) throws Exception {
String vector_max_reg_type;
if (VectorShape.S_Max_BIT.vectorBitSize() > 128) {
vector_max_reg_type = "SVE";
} else {
vector_max_reg_type = "Neon";
}
run_test("test_one_float_push_pop_at_load_barrier", "fp", 1);
run_test("test_two_floats_push_pop_at_load_barrier", "fp", 2);
run_test("test_three_floats_push_pop_at_load_barrier", "fp", 3);
run_test("test_one_double_push_pop_at_load_barrier", "fp", 1);
run_test("test_two_doubles_push_pop_at_load_barrier", "fp", 2);
run_test("test_three_doubles_push_pop_at_load_barrier", "fp", 3);
run_test("test_one_vector_128_push_pop_at_load_barrier", "Neon", 1);
run_test("test_two_vectors_128_push_pop_at_load_barrier", "Neon", 2);
run_test("test_three_vectors_128_push_pop_at_load_barrier", "Neon", 3);
run_test("test_vector_max_push_pop_at_load_barrier", vector_max_reg_type, 1);
run_test("test_float_and_vector_push_pop_at_load_barrier", "fp", 1, vector_max_reg_type, 1);
}
}