8331117: [PPC64] secondary_super_cache does not scale well
Reviewed-by: rrich, amitkumar
This commit is contained in:
parent
113a2c028d
commit
0d1080d194
@ -2130,6 +2130,295 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
|
||||
bind(L_failure); // Fallthru if not successful.
|
||||
}
|
||||
|
||||
// scans count pointer sized words at [addr] for occurrence of value,
|
||||
// generic (count must be >0)
|
||||
// iff found: CR0 eq, scratch == 0
|
||||
void MacroAssembler::repne_scan(Register addr, Register value, Register count, Register scratch) {
|
||||
Label Lloop, Lexit;
|
||||
|
||||
#ifdef ASSERT
|
||||
{
|
||||
Label ok;
|
||||
cmpdi(CCR0, count, 0);
|
||||
bgt(CCR0, ok);
|
||||
stop("count must be positive");
|
||||
bind(ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
mtctr(count);
|
||||
|
||||
bind(Lloop);
|
||||
ld(scratch, 0 , addr);
|
||||
xor_(scratch, scratch, value);
|
||||
beq(CCR0, Lexit);
|
||||
addi(addr, addr, wordSize);
|
||||
bdnz(Lloop);
|
||||
|
||||
bind(Lexit);
|
||||
}
|
||||
|
||||
// Ensure that the inline code and the stub are using the same registers.
|
||||
#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \
|
||||
do { \
|
||||
assert(r_super_klass == R4_ARG2 && \
|
||||
r_array_base == R3_ARG1 && \
|
||||
r_array_length == R7_ARG5 && \
|
||||
(r_array_index == R6_ARG4 || r_array_index == noreg) && \
|
||||
(r_sub_klass == R5_ARG3 || r_sub_klass == noreg) && \
|
||||
(r_bitmap == R11_scratch1 || r_bitmap == noreg) && \
|
||||
(result == R8_ARG6 || result == noreg), "registers must match ppc64.ad"); \
|
||||
} while(0)
|
||||
|
||||
// Return true: we succeeded in generating this code
|
||||
void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3,
|
||||
Register temp4,
|
||||
Register result,
|
||||
u1 super_klass_slot) {
|
||||
assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result);
|
||||
|
||||
Label L_done;
|
||||
|
||||
BLOCK_COMMENT("lookup_secondary_supers_table {");
|
||||
|
||||
const Register
|
||||
r_array_base = temp1,
|
||||
r_array_length = temp2,
|
||||
r_array_index = temp3,
|
||||
r_bitmap = temp4;
|
||||
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
|
||||
ld(r_bitmap, in_bytes(Klass::bitmap_offset()), r_sub_klass);
|
||||
|
||||
// First check the bitmap to see if super_klass might be present. If
|
||||
// the bit is zero, we are certain that super_klass is not one of
|
||||
// the secondary supers.
|
||||
u1 bit = super_klass_slot;
|
||||
int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit;
|
||||
|
||||
// if (shift_count == 0) this is used for comparing with 0:
|
||||
sldi_(r_array_index, r_bitmap, shift_count);
|
||||
|
||||
li(result, 1); // failure
|
||||
// We test the MSB of r_array_index, i.e. its sign bit
|
||||
bge(CCR0, L_done);
|
||||
|
||||
// We will consult the secondary-super array.
|
||||
ld(r_array_base, in_bytes(Klass::secondary_supers_offset()), r_sub_klass);
|
||||
|
||||
// The value i in r_array_index is >= 1, so even though r_array_base
|
||||
// points to the length, we don't need to adjust it to point to the
|
||||
// data.
|
||||
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "Adjust this code");
|
||||
|
||||
// Get the first array index that can contain super_klass.
|
||||
if (bit != 0) {
|
||||
popcntd(r_array_index, r_array_index);
|
||||
// NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word.
|
||||
sldi(r_array_index, r_array_index, LogBytesPerWord); // scale
|
||||
ldx(result, r_array_base, r_array_index);
|
||||
} else {
|
||||
// Actually use index 0, but r_array_base and r_array_index are off by 1 word
|
||||
// such that the sum is precise.
|
||||
ld(result, BytesPerWord, r_array_base);
|
||||
li(r_array_index, BytesPerWord); // for slow path (scaled)
|
||||
}
|
||||
|
||||
xor_(result, result, r_super_klass);
|
||||
beq(CCR0, L_done); // Found a match (result == 0)
|
||||
|
||||
// Is there another entry to check? Consult the bitmap.
|
||||
testbitdi(CCR0, /* temp */ r_array_length, r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK);
|
||||
beq(CCR0, L_done); // (result != 0)
|
||||
|
||||
// Linear probe. Rotate the bitmap so that the next bit to test is
|
||||
// in Bit 2 for the look-ahead check in the slow path.
|
||||
if (bit != 0) {
|
||||
rldicl(r_bitmap, r_bitmap, 64 - bit, 0);
|
||||
}
|
||||
|
||||
// Calls into the stub generated by lookup_secondary_supers_table_slow_path.
|
||||
// Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap.
|
||||
// Kills: r_array_length.
|
||||
// Returns: result.
|
||||
address stub = StubRoutines::lookup_secondary_supers_table_slow_path_stub();
|
||||
Register r_stub_addr = r_array_length;
|
||||
add_const_optimized(r_stub_addr, R29_TOC, MacroAssembler::offset_to_global_toc(stub), R0);
|
||||
mtctr(r_stub_addr);
|
||||
bctrl();
|
||||
|
||||
bind(L_done);
|
||||
BLOCK_COMMENT("} lookup_secondary_supers_table");
|
||||
|
||||
if (VerifySecondarySupers) {
|
||||
verify_secondary_supers_table(r_sub_klass, r_super_klass, result,
|
||||
temp1, temp2, temp3);
|
||||
}
|
||||
}
|
||||
|
||||
// Called by code generated by check_klass_subtype_slow_path
|
||||
// above. This is called when there is a collision in the hashed
|
||||
// lookup in the secondary supers array.
|
||||
void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass,
|
||||
Register r_array_base,
|
||||
Register r_array_index,
|
||||
Register r_bitmap,
|
||||
Register result,
|
||||
Register temp1) {
|
||||
assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, result, temp1);
|
||||
|
||||
const Register
|
||||
r_array_length = temp1,
|
||||
r_sub_klass = noreg;
|
||||
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
|
||||
Label L_done;
|
||||
|
||||
// Load the array length.
|
||||
lwa(r_array_length, Array<Klass*>::length_offset_in_bytes(), r_array_base);
|
||||
// And adjust the array base to point to the data.
|
||||
// NB! Effectively increments current slot index by 1.
|
||||
assert(Array<Klass*>::base_offset_in_bytes() == wordSize, "");
|
||||
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
|
||||
|
||||
// Linear probe
|
||||
Label L_huge;
|
||||
|
||||
// The bitmap is full to bursting.
|
||||
// Implicit invariant: BITMAP_FULL implies (length > 0)
|
||||
assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), "");
|
||||
cmpdi(CCR0, r_bitmap, -1);
|
||||
beq(CCR0, L_huge);
|
||||
|
||||
// NB! Our caller has checked bits 0 and 1 in the bitmap. The
|
||||
// current slot (at secondary_supers[r_array_index]) has not yet
|
||||
// been inspected, and r_array_index may be out of bounds if we
|
||||
// wrapped around the end of the array.
|
||||
|
||||
{ // This is conventional linear probing, but instead of terminating
|
||||
// when a null entry is found in the table, we maintain a bitmap
|
||||
// in which a 0 indicates missing entries.
|
||||
// The check above guarantees there are 0s in the bitmap, so the loop
|
||||
// eventually terminates.
|
||||
|
||||
#ifdef ASSERT
|
||||
{
|
||||
// We should only reach here after having found a bit in the bitmap.
|
||||
// Invariant: array_length == popcount(bitmap)
|
||||
Label ok;
|
||||
cmpdi(CCR0, r_array_length, 0);
|
||||
bgt(CCR0, ok);
|
||||
stop("array_length must be positive");
|
||||
bind(ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Compute limit in r_array_length
|
||||
addi(r_array_length, r_array_length, -1);
|
||||
sldi(r_array_length, r_array_length, LogBytesPerWord);
|
||||
|
||||
Label L_loop;
|
||||
bind(L_loop);
|
||||
|
||||
// Check for wraparound.
|
||||
cmpd(CCR0, r_array_index, r_array_length);
|
||||
isel_0(r_array_index, CCR0, Assembler::greater);
|
||||
|
||||
ldx(result, r_array_base, r_array_index);
|
||||
xor_(result, result, r_super_klass);
|
||||
beq(CCR0, L_done); // success (result == 0)
|
||||
|
||||
// look-ahead check (Bit 2); result is non-zero
|
||||
testbitdi(CCR0, R0, r_bitmap, 2);
|
||||
beq(CCR0, L_done); // fail (result != 0)
|
||||
|
||||
rldicl(r_bitmap, r_bitmap, 64 - 1, 0);
|
||||
addi(r_array_index, r_array_index, BytesPerWord);
|
||||
b(L_loop);
|
||||
}
|
||||
|
||||
{ // Degenerate case: more than 64 secondary supers.
|
||||
// FIXME: We could do something smarter here, maybe a vectorized
|
||||
// comparison or a binary search, but is that worth any added
|
||||
// complexity?
|
||||
bind(L_huge);
|
||||
repne_scan(r_array_base, r_super_klass, r_array_length, result);
|
||||
}
|
||||
|
||||
bind(L_done);
|
||||
}
|
||||
|
||||
// Make sure that the hashed lookup and a linear scan agree.
|
||||
void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register result,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3) {
|
||||
assert_different_registers(r_sub_klass, r_super_klass, result, temp1, temp2, temp3);
|
||||
|
||||
const Register
|
||||
r_array_base = temp1,
|
||||
r_array_length = temp2,
|
||||
r_array_index = temp3,
|
||||
r_bitmap = noreg; // unused
|
||||
|
||||
LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS;
|
||||
|
||||
BLOCK_COMMENT("verify_secondary_supers_table {");
|
||||
|
||||
Label passed, failure;
|
||||
|
||||
// We will consult the secondary-super array.
|
||||
ld(r_array_base, in_bytes(Klass::secondary_supers_offset()), r_sub_klass);
|
||||
// Load the array length.
|
||||
lwa(r_array_length, Array<Klass*>::length_offset_in_bytes(), r_array_base);
|
||||
// And adjust the array base to point to the data.
|
||||
addi(r_array_base, r_array_base, Array<Klass*>::base_offset_in_bytes());
|
||||
|
||||
// convert !=0 to 1
|
||||
neg(R0, result);
|
||||
orr(result, result, R0);
|
||||
srdi(result, result, 63);
|
||||
|
||||
const Register linear_result = r_array_index; // reuse
|
||||
li(linear_result, 1);
|
||||
cmpdi(CCR0, r_array_length, 0);
|
||||
ble(CCR0, failure);
|
||||
repne_scan(r_array_base, r_super_klass, r_array_length, linear_result);
|
||||
bind(failure);
|
||||
|
||||
// convert !=0 to 1
|
||||
neg(R0, linear_result);
|
||||
orr(linear_result, linear_result, R0);
|
||||
srdi(linear_result, linear_result, 63);
|
||||
|
||||
cmpd(CCR0, result, linear_result);
|
||||
beq(CCR0, passed);
|
||||
|
||||
assert_different_registers(R3_ARG1, r_sub_klass, linear_result, result);
|
||||
mr_if_needed(R3_ARG1, r_super_klass);
|
||||
assert_different_registers(R4_ARG2, linear_result, result);
|
||||
mr_if_needed(R4_ARG2, r_sub_klass);
|
||||
assert_different_registers(R5_ARG3, result);
|
||||
neg(R5_ARG3, linear_result);
|
||||
neg(R6_ARG4, result);
|
||||
const char* msg = "mismatch";
|
||||
load_const_optimized(R7_ARG5, (intptr_t)msg, R0);
|
||||
call_VM_leaf(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure));
|
||||
should_not_reach_here();
|
||||
|
||||
bind(passed);
|
||||
|
||||
BLOCK_COMMENT("} verify_secondary_supers_table");
|
||||
}
|
||||
|
||||
void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
|
||||
assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required");
|
||||
|
||||
|
@ -604,6 +604,33 @@ class MacroAssembler: public Assembler {
|
||||
Register temp2_reg,
|
||||
Label& L_success);
|
||||
|
||||
void repne_scan(Register addr, Register value, Register count, Register scratch);
|
||||
|
||||
// As above, but with a constant super_klass.
|
||||
// The result is in Register result, not the condition codes.
|
||||
void lookup_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3,
|
||||
Register temp4,
|
||||
Register result,
|
||||
u1 super_klass_slot);
|
||||
|
||||
void verify_secondary_supers_table(Register r_sub_klass,
|
||||
Register r_super_klass,
|
||||
Register result,
|
||||
Register temp1,
|
||||
Register temp2,
|
||||
Register temp3);
|
||||
|
||||
void lookup_secondary_supers_table_slow_path(Register r_super_klass,
|
||||
Register r_array_base,
|
||||
Register r_array_index,
|
||||
Register r_bitmap,
|
||||
Register result,
|
||||
Register temp1);
|
||||
|
||||
void clinit_barrier(Register klass,
|
||||
Register thread,
|
||||
Label* L_fast_path = nullptr,
|
||||
|
@ -641,6 +641,8 @@ reg_class rarg1_bits64_reg(R3_H, R3);
|
||||
reg_class rarg2_bits64_reg(R4_H, R4);
|
||||
reg_class rarg3_bits64_reg(R5_H, R5);
|
||||
reg_class rarg4_bits64_reg(R6_H, R6);
|
||||
reg_class rarg5_bits64_reg(R7_H, R7);
|
||||
reg_class rarg6_bits64_reg(R8_H, R8);
|
||||
// Thread register, 'written' by tlsLoadP, see there.
|
||||
reg_class thread_bits64_reg(R16_H, R16);
|
||||
|
||||
@ -4354,6 +4356,8 @@ operand iRegPsrc() %{
|
||||
match(rarg2RegP);
|
||||
match(rarg3RegP);
|
||||
match(rarg4RegP);
|
||||
match(rarg5RegP);
|
||||
match(rarg6RegP);
|
||||
match(threadRegP);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
@ -4409,6 +4413,20 @@ operand rarg4RegP() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand rarg5RegP() %{
|
||||
constraint(ALLOC_IN_RC(rarg5_bits64_reg));
|
||||
match(iRegPdst);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand rarg6RegP() %{
|
||||
constraint(ALLOC_IN_RC(rarg6_bits64_reg));
|
||||
match(iRegPdst);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand iRegNsrc() %{
|
||||
constraint(ALLOC_IN_RC(bits32_reg_ro));
|
||||
match(RegN);
|
||||
@ -12024,6 +12042,35 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct partialSubtypeCheckConstSuper(rarg3RegP sub, rarg2RegP super_reg, immP super_con, rarg6RegP result,
|
||||
rarg1RegP tempR1, rarg5RegP tempR2, rarg4RegP tempR3, rscratch1RegP tempR4,
|
||||
flagsRegCR0 cr0, regCTR ctr)
|
||||
%{
|
||||
match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
|
||||
predicate(UseSecondarySupersTable);
|
||||
effect(KILL cr0, KILL ctr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP tempR4);
|
||||
|
||||
ins_cost(DEFAULT_COST*8); // smaller than the other version
|
||||
format %{ "partialSubtypeCheck $result, $sub, $super_reg" %}
|
||||
|
||||
ins_encode %{
|
||||
u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot();
|
||||
if (InlineSecondarySupersTest) {
|
||||
__ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register,
|
||||
$tempR1$$Register, $tempR2$$Register, $tempR3$$Register, $tempR4$$Register,
|
||||
$result$$Register, super_klass_slot);
|
||||
} else {
|
||||
address stub = StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot);
|
||||
Register r_stub_addr = $tempR1$$Register;
|
||||
__ add_const_optimized(r_stub_addr, R29_TOC, MacroAssembler::offset_to_global_toc(stub), R0);
|
||||
__ mtctr(r_stub_addr);
|
||||
__ bctrl();
|
||||
}
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// inlined locking and unlocking
|
||||
|
||||
instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
|
||||
|
@ -4531,6 +4531,46 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
#endif // VM_LITTLE_ENDIAN
|
||||
|
||||
address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) {
|
||||
StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table");
|
||||
|
||||
address start = __ pc();
|
||||
const Register
|
||||
r_super_klass = R4_ARG2,
|
||||
r_array_base = R3_ARG1,
|
||||
r_array_length = R7_ARG5,
|
||||
r_array_index = R6_ARG4,
|
||||
r_sub_klass = R5_ARG3,
|
||||
r_bitmap = R11_scratch1,
|
||||
result = R8_ARG6;
|
||||
|
||||
__ lookup_secondary_supers_table(r_sub_klass, r_super_klass,
|
||||
r_array_base, r_array_length, r_array_index,
|
||||
r_bitmap, result, super_klass_index);
|
||||
__ blr();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Slow path implementation for UseSecondarySupersTable.
|
||||
address generate_lookup_secondary_supers_table_slow_path_stub() {
|
||||
StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table_slow_path");
|
||||
|
||||
address start = __ pc();
|
||||
const Register
|
||||
r_super_klass = R4_ARG2,
|
||||
r_array_base = R3_ARG1,
|
||||
temp1 = R7_ARG5,
|
||||
r_array_index = R6_ARG4,
|
||||
r_bitmap = R11_scratch1,
|
||||
result = R8_ARG6;
|
||||
|
||||
__ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, result, temp1);
|
||||
__ blr();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_cont_thaw(const char* label, Continuation::thaw_kind kind) {
|
||||
if (!Continuations::enabled()) return nullptr;
|
||||
|
||||
@ -4807,6 +4847,16 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
if (UseSecondarySupersTable) {
|
||||
StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub();
|
||||
if (!InlineSecondarySupersTest) {
|
||||
for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) {
|
||||
StubRoutines::_lookup_secondary_supers_table_stubs[slot]
|
||||
= generate_lookup_secondary_supers_table_stub(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler();
|
||||
}
|
||||
|
||||
|
@ -340,6 +340,13 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
|
||||
if (UseSecondarySupersTable && PowerArchitecturePPC64 < 7) {
|
||||
if (!FLAG_IS_DEFAULT(UseSecondarySupersTable)) {
|
||||
warning("UseSecondarySupersTable requires Power7 or later.");
|
||||
}
|
||||
FLAG_SET_DEFAULT(UseSecondarySupersTable, false);
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||
UseSquareToLenIntrinsic = true;
|
||||
|
@ -95,6 +95,7 @@ public:
|
||||
static bool supports_fast_class_init_checks() { return true; }
|
||||
constexpr static bool supports_stack_watermark_barrier() { return true; }
|
||||
constexpr static bool supports_recursive_lightweight_locking() { return true; }
|
||||
constexpr static bool supports_secondary_supers_table() { return true; }
|
||||
|
||||
static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
|
||||
// CPU instruction support
|
||||
|
Loading…
x
Reference in New Issue
Block a user