From ead0116f2624e0e34529e47e4f509142d588b994 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Wed, 6 Nov 2024 08:58:08 +0000 Subject: [PATCH] 8331341: secondary_super_cache does not scale well: C1 and interpreter Reviewed-by: vlivanov, kvn, dlong --- src/hotspot/cpu/aarch64/aarch64.ad | 59 ++-- .../cpu/aarch64/c1_Runtime1_aarch64.cpp | 8 +- .../cpu/aarch64/macroAssembler_aarch64.cpp | 305 ++++++++++++++---- .../cpu/aarch64/macroAssembler_aarch64.hpp | 60 +++- .../cpu/aarch64/stubGenerator_aarch64.cpp | 30 +- src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 2 +- .../cpu/riscv/macroAssembler_riscv.cpp | 2 +- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 2 +- src/hotspot/cpu/x86/macroAssembler_x86.cpp | 305 ++++++++++++++++-- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 74 ++++- src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 8 +- src/hotspot/cpu/x86/x86_32.ad | 4 +- src/hotspot/cpu/x86/x86_64.ad | 85 +++-- src/hotspot/share/asm/register.hpp | 6 + src/hotspot/share/cds/filemap.cpp | 7 - src/hotspot/share/cds/filemap.hpp | 1 - src/hotspot/share/jvmci/vmStructs_jvmci.cpp | 2 +- src/hotspot/share/memory/universe.cpp | 6 +- src/hotspot/share/oops/instanceKlass.cpp | 36 +-- src/hotspot/share/oops/klass.cpp | 87 +++-- src/hotspot/share/oops/klass.hpp | 37 +-- src/hotspot/share/oops/klass.inline.hpp | 69 ++++ 22 files changed, 919 insertions(+), 276 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 0ce06dd8341..eedf29cc563 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -16281,9 +16281,10 @@ instruct ShouldNotReachHere() %{ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr) %{ match(Set result (PartialSubtypeCheck sub super)); + predicate(!UseSecondarySupersTable); effect(KILL cr, KILL temp); - ins_cost(1100); // slightly larger than the next version + ins_cost(20 * INSN_COST); // slightly larger than the next version format %{ "partialSubtypeCheck $result, $sub, $super" %} ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); @@ -16293,6 +16294,34 @@ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_ ins_pipe(pipe_class_memory); %} +// Two versions of partialSubtypeCheck, both used when we need to +// search for a super class in the secondary supers array. The first +// is used when we don't know _a priori_ the class being searched +// for. The second, far more common, is used when we do know: this is +// used for instanceof, checkcast, and any case where C2 can determine +// it by constant propagation. + +instruct partialSubtypeCheckVarSuper(iRegP_R4 sub, iRegP_R0 super, vRegD_V0 vtemp, iRegP_R5 result, + iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp); + + ins_cost(10 * INSN_COST); // slightly larger than the next version + format %{ "partialSubtypeCheck $result, $sub, $super" %} + + ins_encode %{ + __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, + $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, + $vtemp$$FloatRegister, + $result$$Register, /*L_success*/nullptr); + %} + + ins_pipe(pipe_class_memory); +%} + instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP super_con, vRegD_V0 vtemp, iRegP_R5 result, iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3, rFlagsReg cr) @@ -16301,18 +16330,19 @@ instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP su predicate(UseSecondarySupersTable); effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp); - ins_cost(700); // smaller than the next version + ins_cost(5 * INSN_COST); // smaller than the next version format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %} ins_encode %{ bool success = false; u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); if (InlineSecondarySupersTest) { - success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, - $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, - $vtemp$$FloatRegister, - $result$$Register, - super_klass_slot); + success = + __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, + $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, + $vtemp$$FloatRegister, + $result$$Register, + super_klass_slot); } else { address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot))); success = (call != nullptr); @@ -16326,21 +16356,6 @@ instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP su ins_pipe(pipe_class_memory); %} -instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr) -%{ - match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); - effect(KILL temp, KILL result); - - ins_cost(1100); // slightly larger than the next version - format %{ "partialSubtypeCheck $result, $sub, $super == 0" %} - - ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result)); - - opcode(0x0); // Don't zero result reg on hit - - ins_pipe(pipe_class_memory); -%} - // Intrisics for String.compareTo() instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2, diff --git a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp index 0b9acc0f3a8..00b678d5405 100644 --- a/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_Runtime1_aarch64.cpp @@ -842,7 +842,13 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { __ ldp(r4, r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); Label miss; - __ check_klass_subtype_slow_path(r4, r0, r2, r5, nullptr, &miss); + __ check_klass_subtype_slow_path(/*sub_klass*/r4, + /*super_klass*/r0, + /*temp_reg*/r2, + /*temp2_reg*/r5, + /*L_success*/nullptr, + /*L_failure*/&miss); + // Need extras for table lookup: r1, r3, vtemp // fallthrough on success: __ mov(rscratch1, 1); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 252f4232115..9f35a6e75a7 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1387,13 +1387,10 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset) { - assert_different_registers(sub_klass, super_klass, temp_reg); - bool must_load_sco = (super_check_offset.constant_or_zero() == -1); - if (super_check_offset.is_register()) { - assert_different_registers(sub_klass, super_klass, - super_check_offset.as_register()); - } else if (must_load_sco) { + Register super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset); + bool must_load_sco = ! super_check_offset->is_valid(); + if (must_load_sco) { assert(temp_reg != noreg, "supply either a temp or a register offset"); } @@ -1404,7 +1401,6 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; } assert(label_nulls <= 1, "at most one null in the batch"); - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); int sco_offset = in_bytes(Klass::super_check_offset_offset()); Address super_check_offset_addr(super_klass, sco_offset); @@ -1426,11 +1422,13 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, // Check the supertype display: if (must_load_sco) { ldrw(temp_reg, super_check_offset_addr); - super_check_offset = RegisterOrConstant(temp_reg); + super_check_offset = temp_reg; } + Address super_check_addr(sub_klass, super_check_offset); ldr(rscratch1, super_check_addr); cmp(super_klass, rscratch1); // load displayed supertype + br(Assembler::EQ, *L_success); // This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). @@ -1443,31 +1441,12 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point). - if (super_check_offset.is_register()) { - br(Assembler::EQ, *L_success); - subs(zr, super_check_offset.as_register(), sc_offset); - if (L_failure == &L_fallthrough) { - br(Assembler::EQ, *L_slow_path); - } else { - br(Assembler::NE, *L_failure); - final_jmp(*L_slow_path); - } - } else if (super_check_offset.as_constant() == sc_offset) { - // Need a slow path; fast failure is impossible. - if (L_slow_path == &L_fallthrough) { - br(Assembler::EQ, *L_success); - } else { - br(Assembler::NE, *L_slow_path); - final_jmp(*L_success); - } + sub(rscratch1, super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); + if (L_failure == &L_fallthrough) { + cbz(rscratch1, *L_slow_path); } else { - // No slow path; it's a fast decision. - if (L_failure == &L_fallthrough) { - br(Assembler::EQ, *L_success); - } else { - br(Assembler::NE, *L_failure); - final_jmp(*L_success); - } + cbnz(rscratch1, *L_failure); + final_jmp(*L_slow_path); } bind(L_fallthrough); @@ -1507,13 +1486,13 @@ void MacroAssembler::repne_scanw(Register addr, Register value, Register count, bind(Lexit); } -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - bool set_cond_codes) { +void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { // NB! Callers may assume that, when temp2_reg is a valid register, // this code sets it to a nonzero value. @@ -1581,7 +1560,10 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, br(Assembler::NE, *L_failure); // Success. Cache the super we found and proceed in triumph. - str(super_klass, super_cache_addr); + + if (UseSecondarySupersCache) { + str(super_klass, super_cache_addr); + } if (L_success != &L_fallthrough) { b(*L_success); @@ -1592,6 +1574,102 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } +// If Register r is invalid, remove a new register from +// available_regs, and add new register to regs_to_push. +Register MacroAssembler::allocate_if_noreg(Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push) { + if (!r->is_valid()) { + r = *available_regs++; + regs_to_push += r; + } + return r; +} + +// check_klass_subtype_slow_path_table() looks for super_klass in the +// hash table belonging to super_klass, branching to L_success or +// L_failure as appropriate. This is essentially a shim which +// allocates registers as necessary then calls +// lookup_secondary_supers_table() to do the work. Any of the temp +// regs may be noreg, in which case this logic will chooses some +// registers push and pop them from the stack. +void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register result_reg, + FloatRegister vtemp, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg); + + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + + BLOCK_COMMENT("check_klass_subtype_slow_path"); + + RegSetIterator available_regs + = (RegSet::range(r0, r15) - temps - sub_klass - super_klass).begin(); + + RegSet pushed_regs; + + temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs); + temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs); + temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs); + result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs); + + push(pushed_regs, sp); + + lookup_secondary_supers_table_var(sub_klass, + super_klass, + temp_reg, temp2_reg, temp3_reg, vtemp, result_reg, + nullptr); + cmp(result_reg, zr); + + // Unspill the temp. registers: + pop(pushed_regs, sp); + + // NB! Callers may assume that, when set_cond_codes is true, this + // code sets temp2_reg to a nonzero value. + if (set_cond_codes) { + mov(temp2_reg, 1); + } + + br(Assembler::NE, *L_failure); + + if (L_success != &L_fallthrough) { + b(*L_success); + } + + bind(L_fallthrough); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (UseSecondarySupersTable) { + check_klass_subtype_slow_path_table + (sub_klass, super_klass, temp_reg, temp2_reg, /*temp3*/noreg, /*result*/noreg, + /*vtemp*/fnoreg, + L_success, L_failure, set_cond_codes); + } else { + check_klass_subtype_slow_path_linear + (sub_klass, super_klass, temp_reg, temp2_reg, L_success, L_failure, set_cond_codes); + } +} + + // Ensure that the inline code and the stub are using the same registers. #define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ do { \ @@ -1604,16 +1682,15 @@ do { \ (result == r5 || result == noreg), "registers must match aarch64.ad"); \ } while(0) -// Return true: we succeeded in generating this code -bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, - Register r_super_klass, - Register temp1, - Register temp2, - Register temp3, - FloatRegister vtemp, - Register result, - u1 super_klass_slot, - bool stub_is_near) { +bool MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near) { assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2); Label L_fallthrough; @@ -1635,9 +1712,9 @@ bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, // We're going to need the bitmap in a vector reg and in a core reg, // so load both now. - ldr(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + ldr(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); if (bit != 0) { - ldrd(vtemp, Address(r_sub_klass, Klass::bitmap_offset())); + ldrd(vtemp, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); } // First check the bitmap to see if super_klass might be present. If // the bit is zero, we are certain that super_klass is not one of @@ -1701,6 +1778,113 @@ bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, return true; } +// At runtime, return 0 in result if r_super_klass is a superclass of +// r_sub_klass, otherwise return nonzero. Use this version of +// lookup_secondary_supers_table() if you don't know ahead of time +// which superclass will be searched for. Used by interpreter and +// runtime stubs. It is larger and has somewhat greater latency than +// the version above, which takes a constant super_klass_slot. +void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + Label *L_success) { + assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2); + + Label L_fallthrough; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + const Register + r_array_index = temp3, + slot = rscratch1, + r_bitmap = rscratch2; + + ldrb(slot, Address(r_super_klass, Klass::hash_slot_offset())); + + // Make sure that result is nonzero if the test below misses. + mov(result, 1); + + ldr(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + + // This next instruction is equivalent to: + // mov(tmp_reg, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); + // sub(temp2, tmp_reg, slot); + eor(temp2, slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); + lslv(temp2, r_bitmap, temp2); + tbz(temp2, Klass::SECONDARY_SUPERS_TABLE_SIZE - 1, L_fallthrough); + + bool must_save_v0 = (vtemp == fnoreg); + if (must_save_v0) { + // temp1 and result are free, so use them to preserve vtemp + vtemp = v0; + mov(temp1, vtemp, D, 0); + mov(result, vtemp, D, 1); + } + + // Get the first array index that can contain super_klass into r_array_index. + mov(vtemp, D, 0, temp2); + cnt(vtemp, T8B, vtemp); + addv(vtemp, T8B, vtemp); + mov(r_array_index, vtemp, D, 0); + + if (must_save_v0) { + mov(vtemp, D, 0, temp1 ); + mov(vtemp, D, 1, result); + } + + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + const Register + r_array_base = temp1, + r_array_length = temp2; + + // The value i in r_array_index is >= 1, so even though r_array_base + // points to the length, we don't need to adjust it to point to the + // data. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + // We will consult the secondary-super array. + ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + ldr(result, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord))); + eor(result, result, r_super_klass); + cbz(result, L_success ? *L_success : L_fallthrough); // Found a match + + // Is there another entry to check? Consult the bitmap. + rorv(r_bitmap, r_bitmap, slot); + // rol(r_bitmap, r_bitmap, 1); + tbz(r_bitmap, 1, L_fallthrough); + + // The slot we just inspected is at secondary_supers[r_array_index - 1]. + // The next slot to be inspected, by the logic we're about to call, + // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap + // have been checked. + lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, + r_bitmap, r_array_length, result, /*is_stub*/false); + + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + bind(L_fallthrough); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, // r4, r0 + temp1, temp2, result); // r1, r2, r5 + } + + if (L_success) { + cbz(result, *L_success); + } +} + // Called by code generated by check_klass_subtype_slow_path // above. This is called when there is a collision in the hashed // lookup in the secondary supers array. @@ -1709,14 +1893,17 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl Register r_array_index, Register r_bitmap, Register temp1, - Register result) { + Register result, + bool is_stub) { assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, rscratch1); const Register r_array_length = temp1, r_sub_klass = noreg; // unused - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + if (is_stub) { + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + } Label L_fallthrough, L_huge; @@ -1741,8 +1928,10 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl { // This is conventional linear probing, but instead of terminating // when a null entry is found in the table, we maintain a bitmap // in which a 0 indicates missing entries. - // The check above guarantees there are 0s in the bitmap, so the loop - // eventually terminates. + // As long as the bitmap is not completely full, + // array_length == popcount(bitmap). The array_length check above + // guarantees there are 0s in the bitmap, so the loop eventually + // terminates. Label L_loop; bind(L_loop); @@ -1788,8 +1977,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, r_array_index = noreg, // unused r_bitmap = noreg; // unused - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - BLOCK_COMMENT("verify_secondary_supers_table {"); // We will consult the secondary-super array. diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index b4452688950..b23acc15718 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -995,7 +995,7 @@ public: Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + Register super_check_offset = noreg); // The rest of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. @@ -1010,17 +1010,54 @@ public: Label* L_failure, bool set_cond_codes = false); + void check_klass_subtype_slow_path_linear(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + void check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register result_reg, + FloatRegister vtemp_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // If r is valid, return r. + // If r is invalid, remove a register r2 from available_regs, add r2 + // to regs_to_push, then return r2. + Register allocate_if_noreg(const Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push); + + // Secondary subtype checking + void lookup_secondary_supers_table_var(Register sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + Label *L_success); + + // As above, but with a constant super_klass. // The result is in Register result, not the condition codes. - bool lookup_secondary_supers_table(Register r_sub_klass, - Register r_super_klass, - Register temp1, - Register temp2, - Register temp3, - FloatRegister vtemp, - Register result, - u1 super_klass_slot, - bool stub_is_near = false); + bool lookup_secondary_supers_table_const(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near = false); void verify_secondary_supers_table(Register r_sub_klass, Register r_super_klass, @@ -1033,7 +1070,8 @@ public: Register r_array_index, Register r_bitmap, Register temp1, - Register result); + Register result, + bool is_stub = true); // Simplified, combined version, good for typical uses. // Falls through on failure. diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index ecaaf0a98e9..26462eed7de 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -1842,6 +1842,9 @@ class StubGenerator: public StubCodeGenerator { void generate_type_check(Register sub_klass, Register super_check_offset, Register super_klass, + Register temp1, + Register temp2, + Register result, Label& L_success) { assert_different_registers(sub_klass, super_check_offset, super_klass); @@ -1851,7 +1854,7 @@ class StubGenerator: public StubCodeGenerator { __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, nullptr, super_check_offset); - __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, nullptr); + __ check_klass_subtype_slow_path(sub_klass, super_klass, temp1, temp2, &L_success, nullptr); // Fall through on failure! __ BIND(L_miss); @@ -1987,7 +1990,17 @@ class StubGenerator: public StubCodeGenerator { __ cbz(copied_oop, L_store_element); __ load_klass(r19_klass, copied_oop);// query the object klass - generate_type_check(r19_klass, ckoff, ckval, L_store_element); + + BLOCK_COMMENT("type_check:"); + generate_type_check(/*sub_klass*/r19_klass, + /*super_check_offset*/ckoff, + /*super_klass*/ckval, + /*r_array_base*/gct1, + /*temp2*/gct2, + /*result*/r10, L_store_element); + + // Fall through on failure! + // ======== end loop ======== // It was a real error; we must depend on the caller to finish the job. @@ -1996,7 +2009,7 @@ class StubGenerator: public StubCodeGenerator { // their number to the caller. __ subs(count, count_save, count); // K = partially copied oop count - __ eon(count, count, zr); // report (-1^K) to caller + __ eon(count, count, zr); // report (-1^K) to caller __ br(Assembler::EQ, L_done_pop); __ BIND(L_do_card_marks); @@ -2363,7 +2376,8 @@ class StubGenerator: public StubCodeGenerator { __ ldrw(sco_temp, Address(dst_klass, sco_offset)); // Smashes rscratch1, rscratch2 - generate_type_check(scratch_src_klass, sco_temp, dst_klass, L_plain_copy); + generate_type_check(scratch_src_klass, sco_temp, dst_klass, /*temps*/ noreg, noreg, noreg, + L_plain_copy); // Fetch destination element klass from the ObjArrayKlass header. int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); @@ -7097,10 +7111,10 @@ class StubGenerator: public StubCodeGenerator { Label L_success; __ enter(); - __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, - r_array_base, r_array_length, r_array_index, - vtemp, result, super_klass_index, - /*stub_is_near*/true); + __ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass, + r_array_base, r_array_length, r_array_index, + vtemp, result, super_klass_index, + /*stub_is_near*/true); __ leave(); __ ret(lr); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index c7adbfb52f0..190e0c39fd7 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2187,7 +2187,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - ld(r_bitmap, in_bytes(Klass::bitmap_offset()), r_sub_klass); + ld(r_bitmap, in_bytes(Klass::secondary_supers_bitmap_offset()), r_sub_klass); // First check the bitmap to see if super_klass might be present. If // the bit is zero, we are certain that super_klass is not one of diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 3bb478dbe05..1e7a3f65e8e 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -3928,7 +3928,7 @@ bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, // Initialize result value to 1 which means mismatch. mv(result, 1); - ld(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + ld(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); // First check the bitmap to see if super_klass might be present. If // the bit is zero, we are certain that super_klass is not one of diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 6b739553b15..84d09b9c1dc 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3215,7 +3215,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - z_lg(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + z_lg(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); // First check the bitmap to see if super_klass might be present. If // the bit is zero, we are certain that super_klass is not one of diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 55c4e29b8a3..2da8fe68502 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -4660,13 +4660,13 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, } -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - bool set_cond_codes) { +void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { assert_different_registers(sub_klass, super_klass, temp_reg); if (temp2_reg != noreg) assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); @@ -4752,7 +4752,128 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } -#ifdef _LP64 +#ifndef _LP64 + +// 32-bit x86 only: always use the linear search. +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + check_klass_subtype_slow_path_linear + (sub_klass, super_klass, temp_reg, temp2_reg, L_success, L_failure, set_cond_codes); +} + +#else // _LP64 + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + assert(set_cond_codes == false, "must be false on 64-bit x86"); + check_klass_subtype_slow_path + (sub_klass, super_klass, temp_reg, temp2_reg, noreg, noreg, + L_success, L_failure); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Label* L_success, + Label* L_failure) { + if (UseSecondarySupersTable) { + check_klass_subtype_slow_path_table + (sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg, temp4_reg, + L_success, L_failure); + } else { + check_klass_subtype_slow_path_linear + (sub_klass, super_klass, temp_reg, temp2_reg, L_success, L_failure, /*set_cond_codes*/false); + } +} + +Register MacroAssembler::allocate_if_noreg(Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push) { + if (!r->is_valid()) { + r = *available_regs++; + regs_to_push += r; + } + return r; +} + +void MacroAssembler::check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register result_reg, + Label* L_success, + Label* L_failure) { + // NB! Callers may assume that, when temp2_reg is a valid register, + // this code sets it to a nonzero value. + bool temp2_reg_was_valid = temp2_reg->is_valid(); + + RegSet temps = RegSet::of(temp_reg, temp2_reg, temp3_reg); + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + + BLOCK_COMMENT("check_klass_subtype_slow_path_table"); + + RegSetIterator available_regs + = (RegSet::of(rax, rcx, rdx, r8) + r9 + r10 + r11 + r12 - temps - sub_klass - super_klass).begin(); + + RegSet pushed_regs; + + temp_reg = allocate_if_noreg(temp_reg, available_regs, pushed_regs); + temp2_reg = allocate_if_noreg(temp2_reg, available_regs, pushed_regs); + temp3_reg = allocate_if_noreg(temp3_reg, available_regs, pushed_regs); + result_reg = allocate_if_noreg(result_reg, available_regs, pushed_regs); + Register temp4_reg = allocate_if_noreg(noreg, available_regs, pushed_regs); + + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg, result_reg); + + { + + int register_push_size = pushed_regs.size() * Register::max_slots_per_register * VMRegImpl::stack_slot_size; + int aligned_size = align_up(register_push_size, StackAlignmentInBytes); + subptr(rsp, aligned_size); + push_set(pushed_regs, 0); + + lookup_secondary_supers_table_var(sub_klass, + super_klass, + temp_reg, temp2_reg, temp3_reg, temp4_reg, result_reg); + cmpq(result_reg, 0); + + // Unspill the temp. registers: + pop_set(pushed_regs, 0); + // Increment SP but do not clobber flags. + lea(rsp, Address(rsp, aligned_size)); + } + + if (temp2_reg_was_valid) { + movq(temp2_reg, 1); + } + + jcc(Assembler::notEqual, *L_failure); + + if (L_success != &L_fallthrough) { + jmp(*L_success); + } + + bind(L_fallthrough); +} // population_count variant for running without the POPCNT // instruction, which was introduced with SSE4.2 in 2008. @@ -4799,14 +4920,44 @@ do { \ assert(result == rdi || result == noreg, "mismatch"); \ } while(0) -void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, - Register r_super_klass, - Register temp1, - Register temp2, - Register temp3, - Register temp4, - Register result, - u1 super_klass_slot) { +// Versions of salq and rorq that don't need count to be in rcx + +void MacroAssembler::salq(Register dest, Register count) { + if (count == rcx) { + Assembler::salq(dest); + } else { + assert_different_registers(rcx, dest); + xchgq(rcx, count); + Assembler::salq(dest); + xchgq(rcx, count); + } +} + +void MacroAssembler::rorq(Register dest, Register count) { + if (count == rcx) { + Assembler::rorq(dest); + } else { + assert_different_registers(rcx, dest); + xchgq(rcx, count); + Assembler::rorq(dest); + xchgq(rcx, count); + } +} + +// Return true: we succeeded in generating this code +// +// At runtime, return 0 in result if r_super_klass is a superclass of +// r_sub_klass, otherwise return nonzero. Use this if you know the +// super_klass_slot of the class you're looking for. This is always +// the case for instanceof and checkcast. +void MacroAssembler::lookup_secondary_supers_table_const(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot) { assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result); Label L_fallthrough, L_success, L_failure; @@ -4823,7 +4974,7 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, xorq(result, result); // = 0 - movq(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + movq(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); movq(r_array_index, r_bitmap); // First check the bitmap to see if super_klass might be present. If @@ -4896,6 +5047,122 @@ void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, } } +// At runtime, return 0 in result if r_super_klass is a superclass of +// r_sub_klass, otherwise return nonzero. Use this version of +// lookup_secondary_supers_table() if you don't know ahead of time +// which superclass will be searched for. Used by interpreter and +// runtime stubs. It is larger and has somewhat greater latency than +// the version above, which takes a constant super_klass_slot. +void MacroAssembler::lookup_secondary_supers_table_var(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result); + assert_different_registers(r_sub_klass, r_super_klass, rcx); + RegSet temps = RegSet::of(temp1, temp2, temp3, temp4); + + Label L_fallthrough, L_success, L_failure; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + RegSetIterator available_regs = (temps - rcx).begin(); + + // FIXME. Once we are sure that all paths reaching this point really + // do pass rcx as one of our temps we can get rid of the following + // workaround. + assert(temps.contains(rcx), "fix this code"); + + // We prefer to have our shift count in rcx. If rcx is one of our + // temps, use it for slot. If not, pick any of our temps. + Register slot; + if (!temps.contains(rcx)) { + slot = *available_regs++; + } else { + slot = rcx; + } + + const Register r_array_index = *available_regs++; + const Register r_bitmap = *available_regs++; + + // The logic above guarantees this property, but we state it here. + assert_different_registers(r_array_index, r_bitmap, rcx); + + movq(r_bitmap, Address(r_sub_klass, Klass::secondary_supers_bitmap_offset())); + movq(r_array_index, r_bitmap); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + movb(slot, Address(r_super_klass, Klass::hash_slot_offset())); + xorl(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64) + salq(r_array_index, slot); + + testq(r_array_index, r_array_index); + // We test the MSB of r_array_index, i.e. its sign bit + jcc(Assembler::positive, L_failure); + + const Register r_array_base = *available_regs++; + + // Get the first array index that can contain super_klass into r_array_index. + population_count(r_array_index, r_array_index, /*temp2*/r_array_base, /*temp3*/slot); + + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + // We will consult the secondary-super array. + movptr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // We're asserting that the first word in an Array is the + // length, and the second word is the first word of the data. If + // that ever changes, r_array_base will have to be adjusted here. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + cmpq(r_super_klass, Address(r_array_base, r_array_index, Address::times_8)); + jccb(Assembler::equal, L_success); + + // Restore slot to its true value + xorl(slot, (u1)(Klass::SECONDARY_SUPERS_TABLE_SIZE - 1)); // slot ^ 63 === 63 - slot (mod 64) + + // Linear probe. Rotate the bitmap so that the next bit to test is + // in Bit 1. + rorq(r_bitmap, slot); + + // Is there another entry to check? Consult the bitmap. + btq(r_bitmap, 1); + jccb(Assembler::carryClear, L_failure); + + // Calls into the stub generated by lookup_secondary_supers_table_slow_path. + // Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap. + // Kills: r_array_length. + // Returns: result. + lookup_secondary_supers_table_slow_path(r_super_klass, + r_array_base, + r_array_index, + r_bitmap, + /*temp1*/result, + /*temp2*/slot, + &L_success, + nullptr); + + bind(L_failure); + movq(result, 1); + jmpb(L_fallthrough); + + bind(L_success); + xorq(result, result); // = 0 + + bind(L_fallthrough); + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, result, + temp1, temp2, temp3); + } +} + void MacroAssembler::repne_scanq(Register addr, Register value, Register count, Register limit, Label* L_success, Label* L_failure) { Label L_loop, L_fallthrough; @@ -4936,8 +5203,6 @@ void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_kl r_sub_klass = noreg, result = noreg; - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - Label L_fallthrough; int label_nulls = 0; if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } @@ -5034,8 +5299,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, r_array_base = temp3, r_bitmap = noreg; - LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; - BLOCK_COMMENT("verify_secondary_supers_table {"); Label L_success, L_failure, L_check, L_done; diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index d508feed93c..618ec87da86 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -646,24 +646,66 @@ public: Label* L_success, Label* L_failure, bool set_cond_codes = false); - void hashed_check_klass_subtype_slow_path(Register sub_klass, + +#ifdef _LP64 + // The 64-bit version, which may do a hashed subclass lookup. + void check_klass_subtype_slow_path(Register sub_klass, Register super_klass, Register temp_reg, Register temp2_reg, + Register temp3_reg, + Register temp4_reg, Label* L_success, - Label* L_failure, - bool set_cond_codes = false); + Label* L_failure); +#endif + + // Three parts of a hashed subclass lookup: a simple linear search, + // a table lookup, and a fallback that does linear probing in the + // event of a hash collision. + void check_klass_subtype_slow_path_linear(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + void check_klass_subtype_slow_path_table(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register result_reg, + Label* L_success, + Label* L_failure); + void hashed_check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure); // As above, but with a constant super_klass. // The result is in Register result, not the condition codes. - void lookup_secondary_supers_table(Register sub_klass, - Register super_klass, - Register temp1, - Register temp2, - Register temp3, - Register temp4, - Register result, - u1 super_klass_slot); + void lookup_secondary_supers_table_const(Register sub_klass, + Register super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot); + +#ifdef _LP64 + using Assembler::salq; + void salq(Register dest, Register count); + using Assembler::rorq; + void rorq(Register dest, Register count); + void lookup_secondary_supers_table_var(Register sub_klass, + Register super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result); void lookup_secondary_supers_table_slow_path(Register r_super_klass, Register r_array_base, @@ -680,12 +722,20 @@ public: Register temp1, Register temp2, Register temp3); +#endif void repne_scanq(Register addr, Register value, Register count, Register limit, Label* L_success, Label* L_failure = nullptr); - // Simplified, combined version, good for typical uses. + // If r is valid, return r. + // If r is invalid, remove a register r2 from available_regs, add r2 + // to regs_to_push, then return r2. + Register allocate_if_noreg(const Register r, + RegSetIterator &available_regs, + RegSet ®s_to_push); + + // Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, Register super_klass, diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 93b1618024e..82d2fd1e73b 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3831,10 +3831,10 @@ address StubGenerator::generate_lookup_secondary_supers_table_stub(u1 super_klas r_sub_klass = rsi, result = rdi; - __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, - rdx, rcx, rbx, r11, // temps - result, - super_klass_index); + __ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass, + rdx, rcx, rbx, r11, // temps + result, + super_klass_index); __ ret(0); return start; diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index 7c9695571da..02c0f936208 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -1717,7 +1717,9 @@ encode %{ Register Resi = as_Register(ESI_enc); // sub class Label miss; - __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, + // NB: Callers may assume that, when $result is a valid register, + // check_klass_subtype_slow_path sets it to a nonzero value. + __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, nullptr, &miss, /*set_cond_codes:*/ true); if ($primary) { diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index c3fa4c16e55..b4eb4c313d3 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -1833,24 +1833,6 @@ encode %{ __ bind(done); %} - enc_class enc_PartialSubtypeCheck() - %{ - Register Rrdi = as_Register(RDI_enc); // result register - Register Rrax = as_Register(RAX_enc); // super class - Register Rrcx = as_Register(RCX_enc); // killed - Register Rrsi = as_Register(RSI_enc); // sub class - Label miss; - const bool set_cond_codes = true; - - __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi, - nullptr, &miss, - /*set_cond_codes:*/ true); - if ($primary) { - __ xorptr(Rrdi, Rrdi); - } - __ bind(miss); - %} - enc_class clear_avx %{ debug_only(int off0 = __ offset()); if (generate_vzeroupper(Compile::current())) { @@ -12141,6 +12123,7 @@ instruct partialSubtypeCheck(rdi_RegP result, rFlagsReg cr) %{ match(Set result (PartialSubtypeCheck sub super)); + predicate(!UseSecondarySupersTable); effect(KILL rcx, KILL cr); ins_cost(1100); // slightly larger than the next version @@ -12153,8 +12136,46 @@ instruct partialSubtypeCheck(rdi_RegP result, "xorq $result, $result\t\t Hit: rdi zero\n\t" "miss:\t" %} - opcode(0x1); // Force a XOR of RDI - ins_encode(enc_PartialSubtypeCheck()); + ins_encode %{ + Label miss; + // NB: Callers may assume that, when $result is a valid register, + // check_klass_subtype_slow_path_linear sets it to a nonzero + // value. + __ check_klass_subtype_slow_path_linear($sub$$Register, $super$$Register, + $rcx$$Register, $result$$Register, + nullptr, &miss, + /*set_cond_codes:*/ true); + __ xorptr($result$$Register, $result$$Register); + __ bind(miss); + %} + + ins_pipe(pipe_slow); +%} + +// ============================================================================ +// Two versions of hashtable-based partialSubtypeCheck, both used when +// we need to search for a super class in the secondary supers array. +// The first is used when we don't know _a priori_ the class being +// searched for. The second, far more common, is used when we do know: +// this is used for instanceof, checkcast, and any case where C2 can +// determine it by constant propagation. + +instruct partialSubtypeCheckVarSuper(rsi_RegP sub, rax_RegP super, rdi_RegP result, + rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub super)); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4); + + ins_cost(1000); + format %{ "partialSubtypeCheck $result, $sub, $super" %} + + ins_encode %{ + __ lookup_secondary_supers_table_var($sub$$Register, $super$$Register, $temp1$$Register, $temp2$$Register, + $temp3$$Register, $temp4$$Register, $result$$Register); + %} + ins_pipe(pipe_slow); %} @@ -12172,7 +12193,7 @@ instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP su ins_encode %{ u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); if (InlineSecondarySupersTest) { - __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register, + __ lookup_secondary_supers_table_const($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register, $temp3$$Register, $temp4$$Register, $result$$Register, super_klass_slot); } else { @@ -12183,28 +12204,6 @@ instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP su ins_pipe(pipe_slow); %} -instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr, - rsi_RegP sub, rax_RegP super, rcx_RegI rcx, - immP0 zero, - rdi_RegP result) -%{ - match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); - effect(KILL rcx, KILL result); - - ins_cost(1000); - format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t" - "movl rcx, [rdi + Array::length_offset_in_bytes()]\t# length to scan\n\t" - "addq rdi, Array::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t" - "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" - "jne,s miss\t\t# Missed: flags nz\n\t" - "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t" - "miss:\t" %} - - opcode(0x0); // No need to XOR RDI - ins_encode(enc_PartialSubtypeCheck()); - ins_pipe(pipe_slow); -%} - // ============================================================================ // Branch Instructions -- short offset versions // diff --git a/src/hotspot/share/asm/register.hpp b/src/hotspot/share/asm/register.hpp index 6078edecb4a..72a27085a50 100644 --- a/src/hotspot/share/asm/register.hpp +++ b/src/hotspot/share/asm/register.hpp @@ -196,6 +196,12 @@ public: return *this; } + RegSetIterator operator++(int) { + RegSetIterator r = *this; + ++(*this); + return r; + } + RegSetIterator& operator=(const RegSetIterator& mit) { _regs= mit._regs; return *this; diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp index 33a81a81da0..223791cdd36 100644 --- a/src/hotspot/share/cds/filemap.cpp +++ b/src/hotspot/share/cds/filemap.cpp @@ -211,7 +211,6 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment, } _compressed_oops = UseCompressedOops; _compressed_class_ptrs = UseCompressedClassPointers; - _use_secondary_supers_table = UseSecondarySupersTable; _max_heap_size = MaxHeapSize; _use_optimized_module_handling = CDSConfig::is_using_optimized_module_handling(); _has_full_module_graph = CDSConfig::is_dumping_full_module_graph(); @@ -274,7 +273,6 @@ void FileMapHeader::print(outputStream* st) { st->print_cr("- narrow_oop_mode: %d", _narrow_oop_mode); st->print_cr("- compressed_oops: %d", _compressed_oops); st->print_cr("- compressed_class_ptrs: %d", _compressed_class_ptrs); - st->print_cr("- use_secondary_supers_table: %d", _use_secondary_supers_table); st->print_cr("- cloned_vtables_offset: " SIZE_FORMAT_X, _cloned_vtables_offset); st->print_cr("- serialized_data_offset: " SIZE_FORMAT_X, _serialized_data_offset); st->print_cr("- jvm_ident: %s", _jvm_ident); @@ -2491,11 +2489,6 @@ bool FileMapHeader::validate() { return false; } - if (! _use_secondary_supers_table && UseSecondarySupersTable) { - log_warning(cds)("The shared archive was created without UseSecondarySupersTable."); - return false; - } - if (!_use_optimized_module_handling) { CDSConfig::stop_using_optimized_module_handling(); log_info(cds)("optimized module handling: disabled because archive was created without optimized module handling"); diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp index 6650f524408..f7c2e9c19de 100644 --- a/src/hotspot/share/cds/filemap.hpp +++ b/src/hotspot/share/cds/filemap.hpp @@ -192,7 +192,6 @@ private: CompressedOops::Mode _narrow_oop_mode; // compressed oop encoding mode bool _compressed_oops; // save the flag UseCompressedOops bool _compressed_class_ptrs; // save the flag UseCompressedClassPointers - bool _use_secondary_supers_table; // save the flag UseSecondarySupersTable size_t _cloned_vtables_offset; // The address of the first cloned vtable size_t _serialized_data_offset; // Data accessed using {ReadClosure,WriteClosure}::serialize() bool _has_non_jar_in_classpath; // non-jar file entry exists in classpath diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp index 45ad1f577be..da50a524243 100644 --- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp @@ -272,7 +272,7 @@ nonstatic_field(Klass, _modifier_flags, jint) \ nonstatic_field(Klass, _access_flags, AccessFlags) \ nonstatic_field(Klass, _class_loader_data, ClassLoaderData*) \ - nonstatic_field(Klass, _bitmap, uintx) \ + nonstatic_field(Klass, _secondary_supers_bitmap, uintx) \ nonstatic_field(Klass, _hash_slot, uint8_t) \ nonstatic_field(Klass, _misc_flags._flags, u1) \ \ diff --git a/src/hotspot/share/memory/universe.cpp b/src/hotspot/share/memory/universe.cpp index 4b6554173f9..4af9c8b2ef2 100644 --- a/src/hotspot/share/memory/universe.cpp +++ b/src/hotspot/share/memory/universe.cpp @@ -454,10 +454,8 @@ void Universe::genesis(TRAPS) { _the_array_interfaces_array->at_put(1, vmClasses::Serializable_klass()); } - if (UseSecondarySupersTable) { - Universe::_the_array_interfaces_bitmap = Klass::compute_secondary_supers_bitmap(_the_array_interfaces_array); - Universe::_the_empty_klass_bitmap = Klass::compute_secondary_supers_bitmap(_the_empty_klass_array); - } + _the_array_interfaces_bitmap = Klass::compute_secondary_supers_bitmap(_the_array_interfaces_array); + _the_empty_klass_bitmap = Klass::compute_secondary_supers_bitmap(_the_empty_klass_array); initialize_basic_type_klass(_fillerArrayKlass, CHECK); diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp index b522a3948b5..e7cc0a00766 100644 --- a/src/hotspot/share/oops/instanceKlass.cpp +++ b/src/hotspot/share/oops/instanceKlass.cpp @@ -51,6 +51,7 @@ #include "jvm.h" #include "jvmtifiles/jvmti.h" #include "logging/log.hpp" +#include "klass.inline.hpp" #include "logging/logMessage.hpp" #include "logging/logStream.hpp" #include "memory/allocation.inline.hpp" @@ -652,7 +653,7 @@ void InstanceKlass::deallocate_contents(ClassLoaderData* loader_data) { !secondary_supers()->is_shared()) { MetadataFactory::free_array(loader_data, secondary_supers()); } - set_secondary_supers(nullptr); + set_secondary_supers(nullptr, SECONDARY_SUPERS_BITMAP_EMPTY); deallocate_interfaces(loader_data, super(), local_interfaces(), transitive_interfaces()); set_transitive_interfaces(nullptr); @@ -1413,21 +1414,12 @@ GrowableArray* InstanceKlass::compute_secondary_supers(int num_extra_slo // Must share this for correct bootstrapping! set_secondary_supers(Universe::the_empty_klass_array(), Universe::the_empty_klass_bitmap()); return nullptr; - } else if (num_extra_slots == 0) { - // The secondary super list is exactly the same as the transitive interfaces, so - // let's use it instead of making a copy. - // Redefine classes has to be careful not to delete this! - if (!UseSecondarySupersTable) { - set_secondary_supers(interfaces); - return nullptr; - } else if (num_extra_slots == 0 && interfaces->length() <= 1) { - // We will reuse the transitive interfaces list if we're certain - // it's in hash order. - uintx bitmap = compute_secondary_supers_bitmap(interfaces); - set_secondary_supers(interfaces, bitmap); - return nullptr; - } - // ... fall through if that didn't work. + } else if (num_extra_slots == 0 && interfaces->length() <= 1) { + // We will reuse the transitive interfaces list if we're certain + // it's in hash order. + uintx bitmap = compute_secondary_supers_bitmap(interfaces); + set_secondary_supers(interfaces, bitmap); + return nullptr; } // Copy transitive interfaces to a temporary growable array to be constructed // into the secondary super list with extra slots. @@ -3526,20 +3518,20 @@ void InstanceKlass::print_on(outputStream* st) const { st->print(BULLET"trans. interfaces: "); transitive_interfaces()->print_value_on(st); st->cr(); st->print(BULLET"secondary supers: "); secondary_supers()->print_value_on(st); st->cr(); - if (UseSecondarySupersTable) { - st->print(BULLET"hash_slot: %d", hash_slot()); st->cr(); - st->print(BULLET"bitmap: " UINTX_FORMAT_X_0, _bitmap); st->cr(); - } + + st->print(BULLET"hash_slot: %d", hash_slot()); st->cr(); + st->print(BULLET"secondary bitmap: " UINTX_FORMAT_X_0, _secondary_supers_bitmap); st->cr(); + if (secondary_supers() != nullptr) { if (Verbose) { - bool is_hashed = UseSecondarySupersTable && (_bitmap != SECONDARY_SUPERS_BITMAP_FULL); + bool is_hashed = (_secondary_supers_bitmap != SECONDARY_SUPERS_BITMAP_FULL); st->print_cr(BULLET"---- secondary supers (%d words):", _secondary_supers->length()); for (int i = 0; i < _secondary_supers->length(); i++) { ResourceMark rm; // for external_name() Klass* secondary_super = _secondary_supers->at(i); st->print(BULLET"%2d:", i); if (is_hashed) { - int home_slot = compute_home_slot(secondary_super, _bitmap); + int home_slot = compute_home_slot(secondary_super, _secondary_supers_bitmap); int distance = (i - home_slot) & SECONDARY_SUPERS_TABLE_MASK; st->print(" dist:%02d:", distance); } diff --git a/src/hotspot/share/oops/klass.cpp b/src/hotspot/share/oops/klass.cpp index 9e90b4846a6..b87f341ce48 100644 --- a/src/hotspot/share/oops/klass.cpp +++ b/src/hotspot/share/oops/klass.cpp @@ -128,7 +128,7 @@ void Klass::set_name(Symbol* n) { _name->increment_refcount(); } - if (UseSecondarySupersTable) { + { elapsedTimer selftime; selftime.start(); @@ -163,24 +163,51 @@ void Klass::release_C_heap_structures(bool release_constant_pool) { if (_name != nullptr) _name->decrement_refcount(); } -bool Klass::search_secondary_supers(Klass* k) const { - // Put some extra logic here out-of-line, before the search proper. - // This cuts down the size of the inline method. - - // This is necessary, since I am never in my own secondary_super list. - if (this == k) - return true; +bool Klass::linear_search_secondary_supers(const Klass* k) const { // Scan the array-of-objects for a match + // FIXME: We could do something smarter here, maybe a vectorized + // comparison or a binary search, but is that worth any added + // complexity? int cnt = secondary_supers()->length(); for (int i = 0; i < cnt; i++) { if (secondary_supers()->at(i) == k) { - ((Klass*)this)->set_secondary_super_cache(k); return true; } } return false; } +// Given a secondary superklass k, an initial array index, and an +// occupancy bitmap rotated such that Bit 1 is the next bit to test, +// search for k. +bool Klass::fallback_search_secondary_supers(const Klass* k, int index, uintx rotated_bitmap) const { + // Once the occupancy bitmap is almost full, it's faster to use a + // linear search. + if (secondary_supers()->length() > SECONDARY_SUPERS_TABLE_SIZE - 2) { + return linear_search_secondary_supers(k); + } + + // This is conventional linear probing, but instead of terminating + // when a null entry is found in the table, we maintain a bitmap + // in which a 0 indicates missing entries. + + precond((int)population_count(rotated_bitmap) == secondary_supers()->length()); + + // The check for secondary_supers()->length() <= SECONDARY_SUPERS_TABLE_SIZE - 2 + // at the start of this function guarantees there are 0s in the + // bitmap, so this loop eventually terminates. + while ((rotated_bitmap & 2) != 0) { + if (++index == secondary_supers()->length()) { + index = 0; + } + if (secondary_supers()->at(index) == k) { + return true; + } + rotated_bitmap = rotate_right(rotated_bitmap, 1); + } + return false; +} + // Return self, except for abstract classes with exactly 1 // implementor. Then return the 1 concrete implementation. Klass *Klass::up_cast_abstract() { @@ -256,7 +283,7 @@ Klass::Klass() : _kind(UnknownKlassKind) { // The constructor is also used from CppVtableCloner, // which doesn't zero out the memory before calling the constructor. Klass::Klass(KlassKind kind) : _kind(kind), - _shared_class_path_index(-1) { + _shared_class_path_index(-1) { CDS_ONLY(_shared_class_flags = 0;) CDS_JAVA_HEAP_ONLY(_archived_mirror_index = -1;) _primary_supers[0] = this; @@ -292,20 +319,15 @@ bool Klass::can_be_primary_super_slow() const { return true; } -void Klass::set_secondary_supers(Array* secondaries) { - assert(!UseSecondarySupersTable || secondaries == nullptr, ""); - set_secondary_supers(secondaries, SECONDARY_SUPERS_BITMAP_EMPTY); -} - void Klass::set_secondary_supers(Array* secondaries, uintx bitmap) { #ifdef ASSERT - if (UseSecondarySupersTable && secondaries != nullptr) { + if (secondaries != nullptr) { uintx real_bitmap = compute_secondary_supers_bitmap(secondaries); assert(bitmap == real_bitmap, "must be"); assert(secondaries->length() >= (int)population_count(bitmap), "must be"); } #endif - _bitmap = bitmap; + _secondary_supers_bitmap = bitmap; _secondary_supers = secondaries; if (secondaries != nullptr) { @@ -382,6 +404,7 @@ uintx Klass::hash_secondary_supers(Array* secondaries, bool rewrite) { } } assert(i == secondaries->length(), "mismatch"); + postcond((int)population_count(bitmap) == secondaries->length()); return bitmap; } @@ -442,11 +465,7 @@ Array* Klass::pack_secondary_supers(ClassLoaderData* loader_data, } #endif - if (UseSecondarySupersTable) { - bitmap = hash_secondary_supers(secondary_supers, /*rewrite=*/true); // rewrites freshly allocated array - } else { - bitmap = SECONDARY_SUPERS_BITMAP_EMPTY; - } + bitmap = hash_secondary_supers(secondary_supers, /*rewrite=*/true); // rewrites freshly allocated array return secondary_supers; } @@ -770,7 +789,7 @@ void Klass::remove_unshareable_info() { // FIXME: validation in Klass::hash_secondary_supers() may fail for shared klasses. // Even though the bitmaps always match, the canonical order of elements in the table // is not guaranteed to stay the same (see tie breaker during Robin Hood hashing in Klass::hash_insert). - //assert(compute_secondary_supers_bitmap(secondary_supers()) == _bitmap, "broken table"); + //assert(compute_secondary_supers_bitmap(secondary_supers()) == _secondary_supers_bitmap, "broken table"); } void Klass::remove_java_mirror() { @@ -786,7 +805,7 @@ void Klass::remove_java_mirror() { void Klass::restore_unshareable_info(ClassLoaderData* loader_data, Handle protection_domain, TRAPS) { assert(is_klass(), "ensure C++ vtable is restored"); assert(is_shared(), "must be set"); - assert(secondary_supers()->length() >= (int)population_count(_bitmap), "must be"); + assert(secondary_supers()->length() >= (int)population_count(_secondary_supers_bitmap), "must be"); JFR_ONLY(RESTORE_ID(this);) if (log_is_enabled(Trace, cds, unshareable)) { ResourceMark rm(THREAD); @@ -1253,14 +1272,13 @@ static void print_negative_lookup_stats(uintx bitmap, outputStream* st) { void Klass::print_secondary_supers_on(outputStream* st) const { if (secondary_supers() != nullptr) { - if (UseSecondarySupersTable) { - st->print(" - "); st->print("%d elements;", _secondary_supers->length()); - st->print_cr(" bitmap: " UINTX_FORMAT_X_0 ";", _bitmap); - if (_bitmap != SECONDARY_SUPERS_BITMAP_EMPTY && - _bitmap != SECONDARY_SUPERS_BITMAP_FULL) { - st->print(" - "); print_positive_lookup_stats(secondary_supers(), _bitmap, st); st->cr(); - st->print(" - "); print_negative_lookup_stats(_bitmap, st); st->cr(); - } + st->print(" - "); st->print("%d elements;", _secondary_supers->length()); + st->print_cr(" bitmap: " UINTX_FORMAT_X_0 ";", _secondary_supers_bitmap); + if (_secondary_supers_bitmap != SECONDARY_SUPERS_BITMAP_EMPTY && + _secondary_supers_bitmap != SECONDARY_SUPERS_BITMAP_FULL) { + st->print(" - "); print_positive_lookup_stats(secondary_supers(), + _secondary_supers_bitmap, st); st->cr(); + st->print(" - "); print_negative_lookup_stats(_secondary_supers_bitmap, st); st->cr(); } } else { st->print("null"); @@ -1271,7 +1289,6 @@ void Klass::on_secondary_supers_verification_failure(Klass* super, Klass* sub, b ResourceMark rm; super->print(); sub->print(); - fatal("%s: %s implements %s: is_subtype_of: %d; linear_search: %d; table_lookup: %d", - msg, sub->external_name(), super->external_name(), - sub->is_subtype_of(super), linear_result, table_result); + fatal("%s: %s implements %s: linear_search: %d; table_lookup: %d", + msg, sub->external_name(), super->external_name(), linear_result, table_result); } diff --git a/src/hotspot/share/oops/klass.hpp b/src/hotspot/share/oops/klass.hpp index b782799927b..4fc670d85f1 100644 --- a/src/hotspot/share/oops/klass.hpp +++ b/src/hotspot/share/oops/klass.hpp @@ -65,6 +65,7 @@ class PSPromotionManager; class vtableEntry; class Klass : public Metadata { + friend class VMStructs; friend class JVMCIVMStructs; public: @@ -160,6 +161,9 @@ class Klass : public Metadata { // Provide access the corresponding instance java.lang.ClassLoader. ClassLoaderData* _class_loader_data; + // Bitmap and hash code used by hashed secondary supers. + uintx _secondary_supers_bitmap; + uint8_t _hash_slot; int _vtable_len; // vtable length. This field may be read very often when we // have lots of itable dispatches (e.g., lambdas and streams). @@ -171,10 +175,6 @@ class Klass : public Metadata { JFR_ONLY(DEFINE_TRACE_ID_FIELD;) - // Bitmap and hash code used by hashed secondary supers. - uintx _bitmap; - uint8_t _hash_slot; - private: // This is an index into FileMapHeader::_shared_path_table[], to // associate this class with the JAR file where it's loaded from during @@ -239,7 +239,6 @@ protected: void set_secondary_super_cache(Klass* k) { _secondary_super_cache = k; } Array* secondary_supers() const { return _secondary_supers; } - void set_secondary_supers(Array* k); void set_secondary_supers(Array* k, uintx bitmap); uint8_t hash_slot() const { return _hash_slot; } @@ -398,6 +397,11 @@ protected: static void hash_insert(Klass* klass, GrowableArray* secondaries, uintx& bitmap); static uintx hash_secondary_supers(Array* secondaries, bool rewrite); + bool search_secondary_supers(Klass* k) const; + bool lookup_secondary_supers_table(Klass *k) const; + bool linear_search_secondary_supers(const Klass* k) const; + bool fallback_search_secondary_supers(const Klass* k, int index, uintx rotated_bitmap) const; + public: // Secondary supers table support static Array* pack_secondary_supers(ClassLoaderData* loader_data, @@ -409,7 +413,7 @@ protected: static uintx compute_secondary_supers_bitmap(Array* secondary_supers); static uint8_t compute_home_slot(Klass* k, uintx bitmap); - static constexpr int SECONDARY_SUPERS_TABLE_SIZE = sizeof(_bitmap) * 8; + static constexpr int SECONDARY_SUPERS_TABLE_SIZE = sizeof(_secondary_supers_bitmap) * 8; static constexpr int SECONDARY_SUPERS_TABLE_MASK = SECONDARY_SUPERS_TABLE_SIZE - 1; static constexpr uintx SECONDARY_SUPERS_BITMAP_EMPTY = 0; @@ -430,7 +434,9 @@ protected: static ByteSize subklass_offset() { return byte_offset_of(Klass, _subklass); } static ByteSize next_sibling_offset() { return byte_offset_of(Klass, _next_sibling); } #endif - static ByteSize bitmap_offset() { return byte_offset_of(Klass, _bitmap); } + static ByteSize secondary_supers_bitmap_offset() + { return byte_offset_of(Klass, _secondary_supers_bitmap); } + static ByteSize hash_slot_offset() { return byte_offset_of(Klass, _hash_slot); } static ByteSize misc_flags_offset() { return byte_offset_of(Klass, _misc_flags._flags); } // Unpacking layout_helper: @@ -531,22 +537,11 @@ protected: // subclass check bool is_subclass_of(const Klass* k) const; + // subtype check: true if is_subclass_of, or if k is interface and receiver implements it - bool is_subtype_of(Klass* k) const { - juint off = k->super_check_offset(); - Klass* sup = *(Klass**)( (address)this + off ); - const juint secondary_offset = in_bytes(secondary_super_cache_offset()); - if (sup == k) { - return true; - } else if (off != secondary_offset) { - return false; - } else { - return search_secondary_supers(k); - } - } - - bool search_secondary_supers(Klass* k) const; + bool is_subtype_of(Klass* k) const; +public: // Find LCA in class hierarchy Klass *LCA( Klass *k ); diff --git a/src/hotspot/share/oops/klass.inline.hpp b/src/hotspot/share/oops/klass.inline.hpp index f549940ef95..ea9af6d6928 100644 --- a/src/hotspot/share/oops/klass.inline.hpp +++ b/src/hotspot/share/oops/klass.inline.hpp @@ -30,6 +30,7 @@ #include "classfile/classLoaderData.inline.hpp" #include "oops/klassVtable.hpp" #include "oops/markWord.hpp" +#include "utilities/rotate_bits.hpp" // This loads and keeps the klass's loader alive. inline oop Klass::klass_holder() const { @@ -75,4 +76,72 @@ inline ByteSize Klass::vtable_start_offset() { return in_ByteSize(InstanceKlass::header_size() * wordSize); } +// subtype check: true if is_subclass_of, or if k is interface and receiver implements it +inline bool Klass::is_subtype_of(Klass* k) const { + assert(secondary_supers() != nullptr, "must be"); + const juint off = k->super_check_offset(); + const juint secondary_offset = in_bytes(secondary_super_cache_offset()); + if (off == secondary_offset) { + return search_secondary_supers(k); + } else { + Klass* sup = *(Klass**)( (address)this + off ); + return (sup == k); + } +} + +// Hashed search for secondary super k. +inline bool Klass::lookup_secondary_supers_table(Klass* k) const { + uintx bitmap = _secondary_supers_bitmap; + + constexpr int highest_bit_number = SECONDARY_SUPERS_TABLE_SIZE - 1; + uint8_t slot = k->_hash_slot; + uintx shifted_bitmap = bitmap << (highest_bit_number - slot); + + precond((int)population_count(bitmap) <= secondary_supers()->length()); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + if (((shifted_bitmap >> highest_bit_number) & 1) == 0) { + return false; + } + + // Calculate the initial hash probe + int index = population_count(shifted_bitmap) - 1; + if (secondary_supers()->at(index) == k) { + // Yes! It worked the first time. + return true; + } + + // Is there another entry to check? Consult the bitmap. If Bit 1, + // the next bit to test, is zero, we are certain that super_klass is + // not one of the secondary supers. + bitmap = rotate_right(bitmap, slot); + if ((bitmap & 2) == 0) { + return false; + } + + // Continue probing the hash table + return fallback_search_secondary_supers(k, index, bitmap); +} + +inline bool Klass::search_secondary_supers(Klass *k) const { + // This is necessary because I am never in my own secondary_super list. + if (this == k) + return true; + + bool result = lookup_secondary_supers_table(k); + +#ifndef PRODUCT + if (VerifySecondarySupers) { + bool linear_result = linear_search_secondary_supers(k); + if (linear_result != result) { + on_secondary_supers_verification_failure((Klass*)this, k, linear_result, result, "mismatch"); + } + } +#endif // PRODUCT + + return result; +} + #endif // SHARE_OOPS_KLASS_INLINE_HPP