From f11a496de61d800a680517457eb43b078a633953 Mon Sep 17 00:00:00 2001 From: Andrew Haley Date: Tue, 16 Apr 2024 14:21:48 +0000 Subject: [PATCH] 8180450: secondary_super_cache does not scale well Co-authored-by: Vladimir Ivanov Reviewed-by: kvn, vlivanov, dlong --- src/hotspot/cpu/aarch64/aarch64.ad | 35 +- src/hotspot/cpu/aarch64/assembler_aarch64.hpp | 3 +- .../cpu/aarch64/macroAssembler_aarch64.cpp | 240 +++++++++++- .../cpu/aarch64/macroAssembler_aarch64.hpp | 28 +- .../cpu/aarch64/stubGenerator_aarch64.cpp | 58 ++- .../cpu/aarch64/vm_version_aarch64.hpp | 2 + src/hotspot/cpu/x86/assembler_x86.cpp | 9 + src/hotspot/cpu/x86/assembler_x86.hpp | 1 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 334 ++++++++++++++++ src/hotspot/cpu/x86/macroAssembler_x86.hpp | 42 +- src/hotspot/cpu/x86/stubGenerator_x86_64.cpp | 57 +++ src/hotspot/cpu/x86/stubGenerator_x86_64.hpp | 6 + src/hotspot/cpu/x86/vm_version_x86.hpp | 5 + src/hotspot/cpu/x86/x86_64.ad | 38 ++ src/hotspot/share/cds/filemap.cpp | 7 + src/hotspot/share/cds/filemap.hpp | 1 + src/hotspot/share/classfile/classLoader.cpp | 2 + src/hotspot/share/classfile/classLoader.hpp | 6 + src/hotspot/share/memory/universe.cpp | 8 + src/hotspot/share/memory/universe.hpp | 8 +- src/hotspot/share/oops/arrayKlass.cpp | 3 +- src/hotspot/share/oops/instanceKlass.cpp | 60 ++- src/hotspot/share/oops/klass.cpp | 360 ++++++++++++++++-- src/hotspot/share/oops/klass.hpp | 35 +- src/hotspot/share/oops/objArrayKlass.cpp | 3 +- src/hotspot/share/opto/c2_globals.hpp | 3 + src/hotspot/share/opto/matcher.cpp | 8 + src/hotspot/share/opto/memnode.cpp | 6 + .../share/runtime/abstract_vm_version.hpp | 3 + src/hotspot/share/runtime/arguments.cpp | 11 + src/hotspot/share/runtime/globals.hpp | 12 + src/hotspot/share/runtime/stubRoutines.cpp | 7 +- src/hotspot/share/runtime/stubRoutines.hpp | 14 + .../share/utilities/globalDefinitions.hpp | 5 + src/hotspot/share/utilities/rotate_bits.hpp | 57 +++ .../vm/compiler/SecondarySuperCacheHits.java | 108 ++++++ .../SecondarySuperCacheInterContention.java | 81 ++++ .../SecondarySuperCacheIntraContention.java | 72 ++++ .../bench/vm/lang/SecondarySupersLookup.java | 310 +++++++++++++++ .../openjdk/bench/vm/lang/TypePollution.java | 209 ++++++++++ 40 files changed, 2209 insertions(+), 48 deletions(-) create mode 100644 src/hotspot/share/utilities/rotate_bits.hpp create mode 100644 test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java create mode 100644 test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java create mode 100644 test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java create mode 100644 test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java create mode 100644 test/micro/org/openjdk/bench/vm/lang/TypePollution.java diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 2d134475739..a310eae5eb9 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -1,6 +1,6 @@ // // Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. -// Copyright (c) 2014, 2021, Red Hat, Inc. All rights reserved. +// Copyright (c) 2014, 2024, Red Hat, Inc. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -16201,6 +16201,39 @@ instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_ ins_pipe(pipe_class_memory); %} +instruct partialSubtypeCheckConstSuper(iRegP_R4 sub, iRegP_R0 super_reg, immP super_con, vRegD_V0 vtemp, iRegP_R5 result, + iRegP_R1 tempR1, iRegP_R2 tempR2, iRegP_R3 tempR3, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con))); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP tempR1, TEMP tempR2, TEMP tempR3, TEMP vtemp); + + ins_cost(700); // smaller than the next version + format %{ "partialSubtypeCheck $result, $sub, super" %} + + ins_encode %{ + bool success = false; + u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); + if (InlineSecondarySupersTest) { + success = __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, + $tempR1$$Register, $tempR2$$Register, $tempR3$$Register, + $vtemp$$FloatRegister, + $result$$Register, + super_klass_slot); + } else { + address call = __ trampoline_call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot))); + success = (call != nullptr); + } + if (!success) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + ins_pipe(pipe_class_memory); +%} + instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr) %{ match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 9c05c36706d..28a0cc2c7d9 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2595,6 +2595,7 @@ template #undef INSN + // Advanced SIMD across lanes #define INSN(NAME, opc, opc2, accepted) \ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \ diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 0e807e8d83a..fd6db98c7b7 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1514,6 +1514,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, Label* L_success, Label* L_failure, bool set_cond_codes) { + // NB! Callers may assume that, when temp2_reg is a valid register, + // this code sets it to a nonzero value. + assert_different_registers(sub_klass, super_klass, temp_reg); if (temp2_reg != noreg) assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); @@ -1593,6 +1596,241 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } +// Ensure that the inline code and the stub are using the same registers. +#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ +do { \ + assert(r_super_klass == r0 && \ + r_array_base == r1 && \ + r_array_length == r2 && \ + (r_array_index == r3 || r_array_index == noreg) && \ + (r_sub_klass == r4 || r_sub_klass == noreg) && \ + (r_bitmap == rscratch2 || r_bitmap == noreg) && \ + (result == r5 || result == noreg), "registers must match aarch64.ad"); \ +} while(0) + +// Return true: we succeeded in generating this code +bool MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near) { + assert_different_registers(r_sub_klass, temp1, temp2, temp3, result, rscratch1, rscratch2); + + Label L_fallthrough; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + const Register + r_array_base = temp1, // r1 + r_array_length = temp2, // r2 + r_array_index = temp3, // r3 + r_bitmap = rscratch2; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + u1 bit = super_klass_slot; + + // Make sure that result is nonzero if the TBZ below misses. + mov(result, 1); + + // We're going to need the bitmap in a vector reg and in a core reg, + // so load both now. + ldr(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + if (bit != 0) { + ldrd(vtemp, Address(r_sub_klass, Klass::bitmap_offset())); + } + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + tbz(r_bitmap, bit, L_fallthrough); + + // Get the first array index that can contain super_klass into r_array_index. + if (bit != 0) { + shld(vtemp, vtemp, Klass::SECONDARY_SUPERS_TABLE_MASK - bit); + cnt(vtemp, T8B, vtemp); + addv(vtemp, T8B, vtemp); + fmovd(r_array_index, vtemp); + } else { + mov(r_array_index, (u1)1); + } + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + // We will consult the secondary-super array. + ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // The value i in r_array_index is >= 1, so even though r_array_base + // points to the length, we don't need to adjust it to point to the + // data. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + ldr(result, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord))); + eor(result, result, r_super_klass); + cbz(result, L_fallthrough); // Found a match + + // Is there another entry to check? Consult the bitmap. + tbz(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK, L_fallthrough); + + // Linear probe. + if (bit != 0) { + ror(r_bitmap, r_bitmap, bit); + } + + // The slot we just inspected is at secondary_supers[r_array_index - 1]. + // The next slot to be inspected, by the stub we're about to call, + // is secondary_supers[r_array_index]. Bits 0 and 1 in the bitmap + // have been checked. + Address stub = RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub()); + if (stub_is_near) { + bl(stub); + } else { + address call = trampoline_call(stub); + if (call == nullptr) { + return false; // trampoline allocation failed + } + } + + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + bind(L_fallthrough); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, // r4, r0 + temp1, temp2, result); // r1, r2, r5 + } + return true; +} + +// Called by code generated by check_klass_subtype_slow_path +// above. This is called when there is a collision in the hashed +// lookup in the secondary supers array. +void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register result) { + assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result, rscratch1); + + const Register + r_array_length = temp1, + r_sub_klass = noreg; // unused + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + Label L_fallthrough, L_huge; + + // Load the array length. + ldrw(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + // NB! Effectively increments current slot index by 1. + assert(Array::base_offset_in_bytes() == wordSize, ""); + add(r_array_base, r_array_base, Array::base_offset_in_bytes()); + + // The bitmap is full to bursting. + // Implicit invariant: BITMAP_FULL implies (length > 0) + assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), ""); + cmn(r_bitmap, (u1)1); + br(EQ, L_huge); + + // NB! Our caller has checked bits 0 and 1 in the bitmap. The + // current slot (at secondary_supers[r_array_index]) has not yet + // been inspected, and r_array_index may be out of bounds if we + // wrapped around the end of the array. + + { // This is conventional linear probing, but instead of terminating + // when a null entry is found in the table, we maintain a bitmap + // in which a 0 indicates missing entries. + // The check above guarantees there are 0s in the bitmap, so the loop + // eventually terminates. + Label L_loop; + bind(L_loop); + + // Check for wraparound. + cmp(r_array_index, r_array_length); + csel(r_array_index, zr, r_array_index, GE); + + ldr(rscratch1, Address(r_array_base, r_array_index, Address::lsl(LogBytesPerWord))); + eor(result, rscratch1, r_super_klass); + cbz(result, L_fallthrough); + + tbz(r_bitmap, 2, L_fallthrough); // look-ahead check (Bit 2); result is non-zero + + ror(r_bitmap, r_bitmap, 1); + add(r_array_index, r_array_index, 1); + b(L_loop); + } + + { // Degenerate case: more than 64 secondary supers. + // FIXME: We could do something smarter here, maybe a vectorized + // comparison or a binary search, but is that worth any added + // complexity? + bind(L_huge); + cmp(sp, zr); // Clear Z flag; SP is never zero + repne_scan(r_array_base, r_super_klass, r_array_length, rscratch1); + cset(result, NE); // result == 0 iff we got a match. + } + + bind(L_fallthrough); +} + +// Make sure that the hashed lookup and a linear scan agree. +void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register result) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, result, rscratch1); + + const Register + r_array_base = temp1, + r_array_length = temp2, + r_array_index = noreg, // unused + r_bitmap = noreg; // unused + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + BLOCK_COMMENT("verify_secondary_supers_table {"); + + // We will consult the secondary-super array. + ldr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // Load the array length. + ldrw(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + add(r_array_base, r_array_base, Array::base_offset_in_bytes()); + + cmp(sp, zr); // Clear Z flag; SP is never zero + // Scan R2 words at [R5] for an occurrence of R0. + // Set NZ/Z based on last compare. + repne_scan(/*addr*/r_array_base, /*value*/r_super_klass, /*count*/r_array_length, rscratch2); + // rscratch1 == 0 iff we got a match. + cset(rscratch1, NE); + + Label passed; + cmp(result, zr); + cset(result, NE); // normalize result to 0/1 for comparison + + cmp(rscratch1, result); + br(EQ, passed); + { + mov(r0, r_super_klass); // r0 <- r0 + mov(r1, r_sub_klass); // r1 <- r4 + mov(r2, /*expected*/rscratch1); // r2 <- r8 + mov(r3, result); // r3 <- r5 + mov(r4, (address)("mismatch")); // r4 <- const + rt_call(CAST_FROM_FN_PTR(address, Klass::on_secondary_supers_verification_failure), rscratch2); + should_not_reach_here(); + } + bind(passed); + + BLOCK_COMMENT("} verify_secondary_supers_table"); +} + void MacroAssembler::clinit_barrier(Register klass, Register scratch, Label* L_fast_path, Label* L_slow_path) { assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); assert_different_registers(klass, rthread, scratch); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index dad7ec4d497..c8f1ff81af0 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -999,6 +999,31 @@ public: Label* L_failure, bool set_cond_codes = false); + // As above, but with a constant super_klass. + // The result is in Register result, not the condition codes. + bool lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + FloatRegister vtemp, + Register result, + u1 super_klass_slot, + bool stub_is_near = false); + + void verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register result); + + void lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register result); + // Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, @@ -1213,6 +1238,7 @@ public: // - relocInfo::virtual_call_type // // Return: the call PC or null if CodeCache is full. + // Clobbers: rscratch1 address trampoline_call(Address entry); static bool far_branches() { diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 46a7d796267..ece7f8a347a 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2022, Red Hat Inc. All rights reserved. + * Copyright (c) 2014, 2024, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -6772,6 +6772,52 @@ class StubGenerator: public StubCodeGenerator { return start; } + address generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + const Register + r_super_klass = r0, + r_array_base = r1, + r_array_length = r2, + r_array_index = r3, + r_sub_klass = r4, + r_bitmap = rscratch2, + result = r5; + const FloatRegister + vtemp = v0; + + Label L_success; + __ enter(); + __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, + r_array_base, r_array_length, r_array_index, + vtemp, result, super_klass_index, + /*stub_is_near*/true); + __ leave(); + __ ret(lr); + + return start; + } + + // Slow path implementation for UseSecondarySupersTable. + address generate_lookup_secondary_supers_table_slow_path_stub() { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table_slow_path"); + + address start = __ pc(); + const Register + r_super_klass = r0, // argument + r_array_base = r1, // argument + temp1 = r2, // temp + r_array_index = r3, // argument + r_bitmap = rscratch2, // argument + result = r5; // argument + + __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, result); + __ ret(lr); + + return start; + } + #if defined (LINUX) && !defined (__ARM_FEATURE_ATOMICS) // ARMv8.1 LSE versions of the atomic stubs used by Atomic::PlatformXX. @@ -8422,6 +8468,16 @@ class StubGenerator: public StubCodeGenerator { #endif // LINUX + if (UseSecondarySupersTable) { + StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub(); + if (! InlineSecondarySupersTest) { + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + StubRoutines::_lookup_secondary_supers_table_stubs[slot] + = generate_lookup_secondary_supers_table_stub(slot); + } + } + } + StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); StubRoutines::aarch64::set_completed(); // Inidicate that arraycopy and zero_blocks stubs are generated diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 6883dc0d93e..f6cac72804f 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -171,6 +171,8 @@ enum Ampere_CPU_Model { constexpr static bool supports_stack_watermark_barrier() { return true; } constexpr static bool supports_recursive_lightweight_locking() { return true; } + constexpr static bool supports_secondary_supers_table() { return true; } + static void get_compatible_board(char *buf, int buflen); static const SpinWait& spin_wait_desc() { return _spin_wait; } diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index 0896fbd8bf5..dd3f9c64e20 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -13652,6 +13652,15 @@ void Assembler::notq(Register dst) { emit_int16((unsigned char)0xF7, (0xD0 | encode)); } +void Assembler::btq(Register src, int imm8) { + assert(isByte(imm8), "not a byte"); + InstructionMark im(this); + int encode = prefixq_and_encode(src->encoding()); + emit_int16(0x0f, 0xba); + emit_int8(0xe0|encode); + emit_int8(imm8); +} + void Assembler::btsq(Address dst, int imm8) { assert(isByte(imm8), "not a byte"); InstructionMark im(this); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 7c565b4f8e0..656b2a97c70 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -1734,6 +1734,7 @@ private: void btsq(Address dst, int imm8); void btrq(Address dst, int imm8); + void btq(Register src, int imm8); #endif void orw(Register dst, Register src); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index eb70c807e52..3d427fd0cde 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -4726,6 +4726,340 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, bind(L_fallthrough); } +#ifdef _LP64 + +// population_count variant for running without the POPCNT +// instruction, which was introduced with SSE4.2 in 2008. +void MacroAssembler::population_count(Register dst, Register src, + Register scratch1, Register scratch2) { + assert_different_registers(src, scratch1, scratch2); + if (UsePopCountInstruction) { + Assembler::popcntq(dst, src); + } else { + assert_different_registers(src, scratch1, scratch2); + assert_different_registers(dst, scratch1, scratch2); + Label loop, done; + + mov(scratch1, src); + // dst = 0; + // while(scratch1 != 0) { + // dst++; + // scratch1 &= (scratch1 - 1); + // } + xorl(dst, dst); + testq(scratch1, scratch1); + jccb(Assembler::equal, done); + { + bind(loop); + incq(dst); + movq(scratch2, scratch1); + decq(scratch2); + andq(scratch1, scratch2); + jccb(Assembler::notEqual, loop); + } + bind(done); + } +} + +// Ensure that the inline code and the stub are using the same registers. +#define LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS \ +do { \ + assert(r_super_klass == rax, "mismatch"); \ + assert(r_array_base == rbx, "mismatch"); \ + assert(r_array_length == rcx, "mismatch"); \ + assert(r_array_index == rdx, "mismatch"); \ + assert(r_sub_klass == rsi || r_sub_klass == noreg, "mismatch"); \ + assert(r_bitmap == r11 || r_bitmap == noreg, "mismatch"); \ + assert(result == rdi || result == noreg, "mismatch"); \ +} while(0) + +void MacroAssembler::lookup_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot) { + assert_different_registers(r_sub_klass, r_super_klass, temp1, temp2, temp3, temp4, result); + + Label L_fallthrough, L_success, L_failure; + + BLOCK_COMMENT("lookup_secondary_supers_table {"); + + const Register + r_array_index = temp1, + r_array_length = temp2, + r_array_base = temp3, + r_bitmap = temp4; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + xorq(result, result); // = 0 + + movq(r_bitmap, Address(r_sub_klass, Klass::bitmap_offset())); + movq(r_array_index, r_bitmap); + + // First check the bitmap to see if super_klass might be present. If + // the bit is zero, we are certain that super_klass is not one of + // the secondary supers. + u1 bit = super_klass_slot; + { + // NB: If the count in a x86 shift instruction is 0, the flags are + // not affected, so we do a testq instead. + int shift_count = Klass::SECONDARY_SUPERS_TABLE_MASK - bit; + if (shift_count != 0) { + salq(r_array_index, shift_count); + } else { + testq(r_array_index, r_array_index); + } + } + // We test the MSB of r_array_index, i.e. its sign bit + jcc(Assembler::positive, L_failure); + + // Get the first array index that can contain super_klass into r_array_index. + if (bit != 0) { + population_count(r_array_index, r_array_index, temp2, temp3); + } else { + movl(r_array_index, 1); + } + // NB! r_array_index is off by 1. It is compensated by keeping r_array_base off by 1 word. + + // We will consult the secondary-super array. + movptr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + + // We're asserting that the first word in an Array is the + // length, and the second word is the first word of the data. If + // that ever changes, r_array_base will have to be adjusted here. + assert(Array::base_offset_in_bytes() == wordSize, "Adjust this code"); + assert(Array::length_offset_in_bytes() == 0, "Adjust this code"); + + cmpq(r_super_klass, Address(r_array_base, r_array_index, Address::times_8)); + jccb(Assembler::equal, L_success); + + // Is there another entry to check? Consult the bitmap. + btq(r_bitmap, (bit + 1) & Klass::SECONDARY_SUPERS_TABLE_MASK); + jccb(Assembler::carryClear, L_failure); + + // Linear probe. Rotate the bitmap so that the next bit to test is + // in Bit 1. + if (bit != 0) { + rorq(r_bitmap, bit); + } + + // Calls into the stub generated by lookup_secondary_supers_table_slow_path. + // Arguments: r_super_klass, r_array_base, r_array_index, r_bitmap. + // Kills: r_array_length. + // Returns: result. + call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_slow_path_stub())); + // Result (0/1) is in rdi + jmpb(L_fallthrough); + + bind(L_failure); + incq(result); // 0 => 1 + + bind(L_success); + // result = 0; + + bind(L_fallthrough); + BLOCK_COMMENT("} lookup_secondary_supers_table"); + + if (VerifySecondarySupers) { + verify_secondary_supers_table(r_sub_klass, r_super_klass, result, + temp1, temp2, temp3); + } +} + +void MacroAssembler::repne_scanq(Register addr, Register value, Register count, Register limit, + Label* L_success, Label* L_failure) { + Label L_loop, L_fallthrough; + { + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + } + bind(L_loop); + cmpq(value, Address(addr, count, Address::times_8)); + jcc(Assembler::equal, *L_success); + addl(count, 1); + cmpl(count, limit); + jcc(Assembler::less, L_loop); + + if (&L_fallthrough != L_failure) { + jmp(*L_failure); + } + bind(L_fallthrough); +} + +// Called by code generated by check_klass_subtype_slow_path +// above. This is called when there is a collision in the hashed +// lookup in the secondary supers array. +void MacroAssembler::lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register temp2, + Label* L_success, + Label* L_failure) { + assert_different_registers(r_super_klass, r_array_base, r_array_index, r_bitmap, temp1, temp2); + + const Register + r_array_length = temp1, + r_sub_klass = noreg, + result = noreg; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == nullptr) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == nullptr) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one null in the batch"); + + // Load the array length. + movl(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + // NB! Effectively increments current slot index by 1. + assert(Array::base_offset_in_bytes() == wordSize, ""); + addptr(r_array_base, Array::base_offset_in_bytes()); + + // Linear probe + Label L_huge; + + // The bitmap is full to bursting. + // Implicit invariant: BITMAP_FULL implies (length > 0) + assert(Klass::SECONDARY_SUPERS_BITMAP_FULL == ~uintx(0), ""); + cmpq(r_bitmap, (int32_t)-1); // sign-extends immediate to 64-bit value + jcc(Assembler::equal, L_huge); + + // NB! Our caller has checked bits 0 and 1 in the bitmap. The + // current slot (at secondary_supers[r_array_index]) has not yet + // been inspected, and r_array_index may be out of bounds if we + // wrapped around the end of the array. + + { // This is conventional linear probing, but instead of terminating + // when a null entry is found in the table, we maintain a bitmap + // in which a 0 indicates missing entries. + // The check above guarantees there are 0s in the bitmap, so the loop + // eventually terminates. + + xorl(temp2, temp2); // = 0; + + Label L_again; + bind(L_again); + + // Check for array wraparound. + cmpl(r_array_index, r_array_length); + cmovl(Assembler::greaterEqual, r_array_index, temp2); + + cmpq(r_super_klass, Address(r_array_base, r_array_index, Address::times_8)); + jcc(Assembler::equal, *L_success); + + // If the next bit in bitmap is zero, we're done. + btq(r_bitmap, 2); // look-ahead check (Bit 2); Bits 0 and 1 are tested by now + jcc(Assembler::carryClear, *L_failure); + + rorq(r_bitmap, 1); // Bits 1/2 => 0/1 + addl(r_array_index, 1); + + jmp(L_again); + } + + { // Degenerate case: more than 64 secondary supers. + // FIXME: We could do something smarter here, maybe a vectorized + // comparison or a binary search, but is that worth any added + // complexity? + bind(L_huge); + xorl(r_array_index, r_array_index); // = 0 + repne_scanq(r_array_base, r_super_klass, r_array_index, r_array_length, + L_success, + (&L_fallthrough != L_failure ? L_failure : nullptr)); + + bind(L_fallthrough); + } +} + +struct VerifyHelperArguments { + Klass* _super; + Klass* _sub; + intptr_t _linear_result; + intptr_t _table_result; +}; + +static void verify_secondary_supers_table_helper(const char* msg, VerifyHelperArguments* args) { + Klass::on_secondary_supers_verification_failure(args->_super, + args->_sub, + args->_linear_result, + args->_table_result, + msg); +} + +// Make sure that the hashed lookup and a linear scan agree. +void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register result, + Register temp1, + Register temp2, + Register temp3) { + const Register + r_array_index = temp1, + r_array_length = temp2, + r_array_base = temp3, + r_bitmap = noreg; + + LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS; + + BLOCK_COMMENT("verify_secondary_supers_table {"); + + Label L_success, L_failure, L_check, L_done; + + movptr(r_array_base, Address(r_sub_klass, in_bytes(Klass::secondary_supers_offset()))); + movl(r_array_length, Address(r_array_base, Array::length_offset_in_bytes())); + // And adjust the array base to point to the data. + addptr(r_array_base, Array::base_offset_in_bytes()); + + testl(r_array_length, r_array_length); // array_length == 0? + jcc(Assembler::zero, L_failure); + + movl(r_array_index, 0); + repne_scanq(r_array_base, r_super_klass, r_array_index, r_array_length, &L_success); + // fall through to L_failure + + const Register linear_result = r_array_index; // reuse temp1 + + bind(L_failure); // not present + movl(linear_result, 1); + jmp(L_check); + + bind(L_success); // present + movl(linear_result, 0); + + bind(L_check); + cmpl(linear_result, result); + jcc(Assembler::equal, L_done); + + { // To avoid calling convention issues, build a record on the stack + // and pass the pointer to that instead. + push(result); + push(linear_result); + push(r_sub_klass); + push(r_super_klass); + movptr(c_rarg1, rsp); + movptr(c_rarg0, (uintptr_t) "mismatch"); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, verify_secondary_supers_table_helper))); + should_not_reach_here(); + } + bind(L_done); + + BLOCK_COMMENT("} verify_secondary_supers_table"); +} + +#undef LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS + +#endif // LP64 + void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index bafa3b92dea..738ce8a55c3 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -595,6 +595,8 @@ public: ); void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); + void population_count(Register dst, Register src, Register scratch1, Register scratch2); + // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, @@ -645,8 +647,46 @@ public: Label* L_success, Label* L_failure, bool set_cond_codes = false); + void hashed_check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); - // Simplified, combined version, good for typical uses. + // As above, but with a constant super_klass. + // The result is in Register result, not the condition codes. + void lookup_secondary_supers_table(Register sub_klass, + Register super_klass, + Register temp1, + Register temp2, + Register temp3, + Register temp4, + Register result, + u1 super_klass_slot); + + void lookup_secondary_supers_table_slow_path(Register r_super_klass, + Register r_array_base, + Register r_array_index, + Register r_bitmap, + Register temp1, + Register temp2, + Label* L_success, + Label* L_failure = nullptr); + + void verify_secondary_supers_table(Register r_sub_klass, + Register r_super_klass, + Register expected, + Register temp1, + Register temp2, + Register temp3); + + void repne_scanq(Register addr, Register value, Register count, Register limit, + Label* L_success, + Label* L_failure = nullptr); + + // Simplified, combined version, good for typical uses. // Falls through on failure. void check_klass_subtype(Register sub_klass, Register super_klass, diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp index 71aafdc1cd3..fecefcea0ee 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -3994,6 +3994,54 @@ address StubGenerator::generate_upcall_stub_exception_handler() { return start; } +address StubGenerator::generate_lookup_secondary_supers_table_stub(u1 super_klass_index) { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + + const Register + r_super_klass = rax, + r_sub_klass = rsi, + result = rdi; + + __ lookup_secondary_supers_table(r_sub_klass, r_super_klass, + rdx, rcx, rbx, r11, // temps + result, + super_klass_index); + __ ret(0); + + return start; +} + +// Slow path implementation for UseSecondarySupersTable. +address StubGenerator::generate_lookup_secondary_supers_table_slow_path_stub() { + StubCodeMark mark(this, "StubRoutines", "lookup_secondary_supers_table"); + + address start = __ pc(); + + const Register + r_super_klass = rax, + r_array_base = rbx, + r_array_index = rdx, + r_sub_klass = rsi, + r_bitmap = r11, + result = rdi; + + Label L_success; + __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, r_array_index, r_bitmap, + rcx, rdi, // temps + &L_success); + // bind(L_failure); + __ movl(result, 1); + __ ret(0); + + __ bind(L_success); + __ movl(result, 0); + __ ret(0); + + return start; +} + void StubGenerator::create_control_words() { // Round to nearest, 64-bit mode, exceptions masked, flags specialized StubRoutines::x86::_mxcsr_std = EnableX86ECoreOpts ? 0x1FBF : 0x1F80; @@ -4279,6 +4327,14 @@ void StubGenerator::generate_compiler_stubs() { StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); } + if (UseSecondarySupersTable) { + StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub(); + if (! InlineSecondarySupersTest) { + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + StubRoutines::_lookup_secondary_supers_table_stubs[slot] = generate_lookup_secondary_supers_table_stub(slot); + } + } + } if (UseMontgomeryMultiplyIntrinsic) { StubRoutines::_montgomeryMultiply = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); @@ -4372,6 +4428,7 @@ void StubGenerator::generate_compiler_stubs() { StubRoutines::_vector_d_math[VectorSupport::VEC_SIZE_256][op] = (address)os::dll_lookup(libjsvml, ebuf); } } + #endif // COMPILER2 #endif // COMPILER2_OR_JVMCI } diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp index 42ae25e410f..e573ea98c14 100644 --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp @@ -605,6 +605,12 @@ class StubGenerator: public StubCodeGenerator { // shared exception handler for FFM upcall stubs address generate_upcall_stub_exception_handler(); + // Specialized stub implementations for UseSecondarySupersTable. + address generate_lookup_secondary_supers_table_stub(u1 super_klass_index); + + // Slow path implementation for UseSecondarySupersTable. + address generate_lookup_secondary_supers_table_slow_path_stub(); + void create_control_words(); // Initialization diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 845b8a7e24d..18b272ab58b 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -788,6 +788,11 @@ public: return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 } + // x86_64 supports secondary supers table + constexpr static bool supports_secondary_supers_table() { + return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + } + constexpr static bool supports_stack_watermark_barrier() { return true; } diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 2e0cf770c22..288212d6d13 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -296,6 +296,9 @@ reg_class long_rcx_reg(RCX, RCX_H); // Singleton class for RDX long register reg_class long_rdx_reg(RDX, RDX_H); +// Singleton class for R11 long register +reg_class long_r11_reg(R11, R11_H); + // Singleton class for RAX int register reg_class int_rax_reg(RAX); @@ -2667,6 +2670,16 @@ operand rdx_RegL() interface(REG_INTER); %} +operand r11_RegL() +%{ + constraint(ALLOC_IN_RC(long_r11_reg)); + match(RegL); + match(rRegL); + + format %{ %} + interface(REG_INTER); +%} + operand no_rbp_r13_RegL() %{ constraint(ALLOC_IN_RC(long_no_rbp_r13_reg)); @@ -12072,6 +12085,31 @@ instruct partialSubtypeCheck(rdi_RegP result, ins_pipe(pipe_slow); %} +instruct partialSubtypeCheckConstSuper(rsi_RegP sub, rax_RegP super_reg, immP super_con, rdi_RegP result, + rdx_RegL temp1, rcx_RegL temp2, rbx_RegP temp3, r11_RegL temp4, + rFlagsReg cr) +%{ + match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con))); + predicate(UseSecondarySupersTable); + effect(KILL cr, TEMP temp1, TEMP temp2, TEMP temp3, TEMP temp4); + + ins_cost(700); // smaller than the next version + format %{ "partialSubtypeCheck $result, $sub, super" %} + + ins_encode %{ + u1 super_klass_slot = ((Klass*)$super_con$$constant)->hash_slot(); + if (InlineSecondarySupersTest) { + __ lookup_secondary_supers_table($sub$$Register, $super_reg$$Register, $temp1$$Register, $temp2$$Register, + $temp3$$Register, $temp4$$Register, $result$$Register, + super_klass_slot); + } else { + __ call(RuntimeAddress(StubRoutines::lookup_secondary_supers_table_stub(super_klass_slot))); + } + %} + + ins_pipe(pipe_slow); +%} + instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr, rsi_RegP sub, rax_RegP super, rcx_RegI rcx, immP0 zero, diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp index c81838ed2ef..707a1de6fd1 100644 --- a/src/hotspot/share/cds/filemap.cpp +++ b/src/hotspot/share/cds/filemap.cpp @@ -211,6 +211,7 @@ void FileMapHeader::populate(FileMapInfo *info, size_t core_region_alignment, } _compressed_oops = UseCompressedOops; _compressed_class_ptrs = UseCompressedClassPointers; + _use_secondary_supers_table = UseSecondarySupersTable; _max_heap_size = MaxHeapSize; _use_optimized_module_handling = CDSConfig::is_using_optimized_module_handling(); _has_full_module_graph = CDSConfig::is_dumping_full_module_graph(); @@ -274,6 +275,7 @@ void FileMapHeader::print(outputStream* st) { st->print_cr("- narrow_oop_mode: %d", _narrow_oop_mode); st->print_cr("- compressed_oops: %d", _compressed_oops); st->print_cr("- compressed_class_ptrs: %d", _compressed_class_ptrs); + st->print_cr("- use_secondary_supers_table: %d", _use_secondary_supers_table); st->print_cr("- cloned_vtables_offset: " SIZE_FORMAT_X, _cloned_vtables_offset); st->print_cr("- serialized_data_offset: " SIZE_FORMAT_X, _serialized_data_offset); st->print_cr("- jvm_ident: %s", _jvm_ident); @@ -2438,6 +2440,11 @@ bool FileMapHeader::validate() { return false; } + if (! _use_secondary_supers_table && UseSecondarySupersTable) { + log_warning(cds)("The shared archive was created without UseSecondarySupersTable."); + return false; + } + if (!_use_optimized_module_handling) { CDSConfig::stop_using_optimized_module_handling(); log_info(cds)("optimized module handling: disabled because archive was created without optimized module handling"); diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp index 6d106ef099d..a7f1c23d00a 100644 --- a/src/hotspot/share/cds/filemap.hpp +++ b/src/hotspot/share/cds/filemap.hpp @@ -191,6 +191,7 @@ private: CompressedOops::Mode _narrow_oop_mode; // compressed oop encoding mode bool _compressed_oops; // save the flag UseCompressedOops bool _compressed_class_ptrs; // save the flag UseCompressedClassPointers + bool _use_secondary_supers_table; // save the flag UseSecondarySupersTable size_t _cloned_vtables_offset; // The address of the first cloned vtable size_t _serialized_data_offset; // Data accessed using {ReadClosure,WriteClosure}::serialize() bool _has_non_jar_in_classpath; // non-jar file entry exists in classpath diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp index 3fc9b5f7976..76084ff1685 100644 --- a/src/hotspot/share/classfile/classLoader.cpp +++ b/src/hotspot/share/classfile/classLoader.cpp @@ -119,6 +119,7 @@ PerfCounter* ClassLoader::_perf_define_appclass_selftime = nullptr; PerfCounter* ClassLoader::_perf_app_classfile_bytes_read = nullptr; PerfCounter* ClassLoader::_perf_sys_classfile_bytes_read = nullptr; PerfCounter* ClassLoader::_unsafe_defineClassCallCounter = nullptr; +PerfCounter* ClassLoader::_perf_secondary_hash_time = nullptr; GrowableArray* ClassLoader::_patch_mod_entries = nullptr; GrowableArray* ClassLoader::_exploded_entries = nullptr; @@ -1337,6 +1338,7 @@ void ClassLoader::initialize(TRAPS) { NEWPERFBYTECOUNTER(_perf_sys_classfile_bytes_read, SUN_CLS, "sysClassBytes"); NEWPERFEVENTCOUNTER(_unsafe_defineClassCallCounter, SUN_CLS, "unsafeDefineClassCalls"); + NEWPERFTICKCOUNTER(_perf_secondary_hash_time, SUN_CLS, "secondarySuperHashTime"); } // lookup java library entry points diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp index c8ea47435dd..d3ca476ac95 100644 --- a/src/hotspot/share/classfile/classLoader.hpp +++ b/src/hotspot/share/classfile/classLoader.hpp @@ -169,6 +169,9 @@ class ClassLoader: AllStatic { static PerfCounter* _unsafe_defineClassCallCounter; + // Count the time taken to hash the scondary superclass arrays. + static PerfCounter* _perf_secondary_hash_time; + // The boot class path consists of 3 ordered pieces: // 1. the module/path pairs specified to --patch-module // --patch-module==()* @@ -269,6 +272,9 @@ class ClassLoader: AllStatic { static PerfCounter* perf_class_link_time() { return _perf_class_link_time; } static PerfCounter* perf_class_link_selftime() { return _perf_class_link_selftime; } static PerfCounter* perf_shared_classload_time() { return _perf_shared_classload_time; } + static PerfCounter* perf_secondary_hash_time() { + return _perf_secondary_hash_time; + } static PerfCounter* perf_sys_classload_time() { return _perf_sys_classload_time; } static PerfCounter* perf_app_classload_time() { return _perf_app_classload_time; } static PerfCounter* perf_app_classload_selftime() { return _perf_app_classload_selftime; } diff --git a/src/hotspot/share/memory/universe.cpp b/src/hotspot/share/memory/universe.cpp index 3769285d08c..237eac5017c 100644 --- a/src/hotspot/share/memory/universe.cpp +++ b/src/hotspot/share/memory/universe.cpp @@ -161,6 +161,9 @@ Array* Universe::_the_empty_klass_array = nullptr; Array* Universe::_the_empty_instance_klass_array = nullptr; Array* Universe::_the_empty_method_array = nullptr; +uintx Universe::_the_array_interfaces_bitmap = 0; +uintx Universe::_the_empty_klass_bitmap = 0; + // These variables are guarded by FullGCALot_lock. debug_only(OopHandle Universe::_fullgc_alot_dummy_array;) debug_only(int Universe::_fullgc_alot_dummy_next = 0;) @@ -437,6 +440,11 @@ void Universe::genesis(TRAPS) { _the_array_interfaces_array->at_put(1, vmClasses::Serializable_klass()); } + if (UseSecondarySupersTable) { + Universe::_the_array_interfaces_bitmap = Klass::compute_secondary_supers_bitmap(_the_array_interfaces_array); + Universe::_the_empty_klass_bitmap = Klass::compute_secondary_supers_bitmap(_the_empty_klass_array); + } + initialize_basic_type_klass(_fillerArrayKlass, CHECK); initialize_basic_type_klass(boolArrayKlass(), CHECK); diff --git a/src/hotspot/share/memory/universe.hpp b/src/hotspot/share/memory/universe.hpp index c5e366f507f..09e00bb24a0 100644 --- a/src/hotspot/share/memory/universe.hpp +++ b/src/hotspot/share/memory/universe.hpp @@ -96,6 +96,9 @@ class Universe: AllStatic { static Array* _the_array_interfaces_array; + static uintx _the_array_interfaces_bitmap; + static uintx _the_empty_klass_bitmap; + // array of preallocated error objects with backtrace static OopHandle _preallocated_out_of_memory_error_array; @@ -229,7 +232,8 @@ class Universe: AllStatic { static oop virtual_machine_error_instance(); static oop vm_exception() { return virtual_machine_error_instance(); } - static Array* the_array_interfaces_array() { return _the_array_interfaces_array; } + static Array* the_array_interfaces_array() { return _the_array_interfaces_array; } + static uintx the_array_interfaces_bitmap() { return _the_array_interfaces_bitmap; } static Method* finalizer_register_method(); static Method* loader_addClass_method(); @@ -262,6 +266,8 @@ class Universe: AllStatic { static Array* the_empty_klass_array() { return _the_empty_klass_array; } static Array* the_empty_instance_klass_array() { return _the_empty_instance_klass_array; } + static uintx the_empty_klass_bitmap() { return _the_empty_klass_bitmap; } + // OutOfMemoryError support. Returns an error with the required message. The returned error // may or may not have a backtrace. If error has a backtrace then the stack trace is already // filled in. diff --git a/src/hotspot/share/oops/arrayKlass.cpp b/src/hotspot/share/oops/arrayKlass.cpp index 3b97b25da6c..5685bbb6bb5 100644 --- a/src/hotspot/share/oops/arrayKlass.cpp +++ b/src/hotspot/share/oops/arrayKlass.cpp @@ -179,7 +179,8 @@ GrowableArray* ArrayKlass::compute_secondary_supers(int num_extra_slots, assert(num_extra_slots == 0, "sanity of primitive array type"); assert(transitive_interfaces == nullptr, "sanity"); // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_array_interfaces_array()); + set_secondary_supers(Universe::the_array_interfaces_array(), + Universe::the_array_interfaces_bitmap()); return nullptr; } diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp index ec92a9e8227..43bec86d8e5 100644 --- a/src/hotspot/share/oops/instanceKlass.cpp +++ b/src/hotspot/share/oops/instanceKlass.cpp @@ -1427,29 +1427,37 @@ bool InstanceKlass::can_be_primary_super_slow() const { GrowableArray* InstanceKlass::compute_secondary_supers(int num_extra_slots, Array* transitive_interfaces) { // The secondaries are the implemented interfaces. - Array* interfaces = transitive_interfaces; + // We need the cast because Array is NOT a supertype of Array, + // (but it's safe to do here because we won't write into _secondary_supers from this point on). + Array* interfaces = (Array*)(address)transitive_interfaces; int num_secondaries = num_extra_slots + interfaces->length(); if (num_secondaries == 0) { // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_empty_klass_array()); + set_secondary_supers(Universe::the_empty_klass_array(), Universe::the_empty_klass_bitmap()); return nullptr; } else if (num_extra_slots == 0) { // The secondary super list is exactly the same as the transitive interfaces, so // let's use it instead of making a copy. // Redefine classes has to be careful not to delete this! - // We need the cast because Array is NOT a supertype of Array, - // (but it's safe to do here because we won't write into _secondary_supers from this point on). - set_secondary_supers((Array*)(address)interfaces); - return nullptr; - } else { - // Copy transitive interfaces to a temporary growable array to be constructed - // into the secondary super list with extra slots. - GrowableArray* secondaries = new GrowableArray(interfaces->length()); - for (int i = 0; i < interfaces->length(); i++) { - secondaries->push(interfaces->at(i)); + if (!UseSecondarySupersTable) { + set_secondary_supers(interfaces); + return nullptr; + } else if (num_extra_slots == 0 && interfaces->length() <= 1) { + // We will reuse the transitive interfaces list if we're certain + // it's in hash order. + uintx bitmap = compute_secondary_supers_bitmap(interfaces); + set_secondary_supers(interfaces, bitmap); + return nullptr; } - return secondaries; + // ... fall through if that didn't work. } + // Copy transitive interfaces to a temporary growable array to be constructed + // into the secondary super list with extra slots. + GrowableArray* secondaries = new GrowableArray(interfaces->length()); + for (int i = 0; i < interfaces->length(); i++) { + secondaries->push(interfaces->at(i)); + } + return secondaries; } bool InstanceKlass::implements_interface(Klass* k) const { @@ -3537,7 +3545,7 @@ void InstanceKlass::print_on(outputStream* st) const { } st->print(BULLET"arrays: "); Metadata::print_value_on_maybe_null(st, array_klasses()); st->cr(); - st->print(BULLET"methods: "); methods()->print_value_on(st); st->cr(); + st->print(BULLET"methods: "); methods()->print_value_on(st); st->cr(); if (Verbose || WizardMode) { Array* method_array = methods(); for (int i = 0; i < method_array->length(); i++) { @@ -3557,6 +3565,29 @@ void InstanceKlass::print_on(outputStream* st) const { } st->print(BULLET"local interfaces: "); local_interfaces()->print_value_on(st); st->cr(); st->print(BULLET"trans. interfaces: "); transitive_interfaces()->print_value_on(st); st->cr(); + + st->print(BULLET"secondary supers: "); secondary_supers()->print_value_on(st); st->cr(); + if (UseSecondarySupersTable) { + st->print(BULLET"hash_slot: %d", hash_slot()); st->cr(); + st->print(BULLET"bitmap: " UINTX_FORMAT_X_0, _bitmap); st->cr(); + } + if (secondary_supers() != nullptr) { + if (Verbose) { + bool is_hashed = UseSecondarySupersTable && (_bitmap != SECONDARY_SUPERS_BITMAP_FULL); + st->print_cr(BULLET"---- secondary supers (%d words):", _secondary_supers->length()); + for (int i = 0; i < _secondary_supers->length(); i++) { + ResourceMark rm; // for external_name() + Klass* secondary_super = _secondary_supers->at(i); + st->print(BULLET"%2d:", i); + if (is_hashed) { + int home_slot = compute_home_slot(secondary_super, _bitmap); + int distance = (i - home_slot) & SECONDARY_SUPERS_TABLE_MASK; + st->print(" dist:%02d:", distance); + } + st->print_cr(" %p %s", secondary_super, secondary_super->external_name()); + } + } + } st->print(BULLET"constants: "); constants()->print_value_on(st); st->cr(); if (class_loader_data() != nullptr) { st->print(BULLET"class loader data: "); @@ -3614,6 +3645,7 @@ void InstanceKlass::print_on(outputStream* st) const { st->print(BULLET"itable length %d (start addr: " PTR_FORMAT ")", itable_length(), p2i(start_of_itable())); st->cr(); if (itable_length() > 0 && (Verbose || WizardMode)) print_vtable(start_of_itable(), itable_length(), st); st->print_cr(BULLET"---- static fields (%d words):", static_field_size()); + FieldPrinter print_static_field(st); ((InstanceKlass*)this)->do_local_static_fields(&print_static_field); st->print_cr(BULLET"---- non-static fields (%d words):", nonstatic_field_size()); diff --git a/src/hotspot/share/oops/klass.cpp b/src/hotspot/share/oops/klass.cpp index 8eb54696f6b..5b1919a5c4c 100644 --- a/src/hotspot/share/oops/klass.cpp +++ b/src/hotspot/share/oops/klass.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,7 @@ #include "cds/archiveHeapLoader.hpp" #include "cds/cdsConfig.hpp" #include "cds/heapShared.hpp" +#include "classfile/classLoader.hpp" #include "classfile/classLoaderData.inline.hpp" #include "classfile/classLoaderDataGraph.inline.hpp" #include "classfile/javaClasses.inline.hpp" @@ -51,8 +52,10 @@ #include "prims/jvmtiExport.hpp" #include "runtime/atomic.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/perfData.hpp" #include "utilities/macros.hpp" #include "utilities/powerOfTwo.hpp" +#include "utilities/rotate_bits.hpp" #include "utilities/stack.inline.hpp" void Klass::set_java_mirror(Handle m) { @@ -77,9 +80,66 @@ void Klass::set_is_cloneable() { } } +uint8_t Klass::compute_hash_slot(Symbol* n) { + uint hash_code; + // Special cases for the two superclasses of all Array instances. + // Code elsewhere assumes, for all instances of ArrayKlass, that + // these two interfaces will be in this order. + + // We ensure there are some empty slots in the hash table between + // these two very common interfaces because if they were adjacent + // (e.g. Slots 0 and 1), then any other class which hashed to 0 or 1 + // would result in a probe length of 3. + if (n == vmSymbols::java_lang_Cloneable()) { + hash_code = 0; + } else if (n == vmSymbols::java_io_Serializable()) { + hash_code = SECONDARY_SUPERS_TABLE_SIZE / 2; + } else { + auto s = (const jbyte*) n->bytes(); + hash_code = java_lang_String::hash_code(s, n->utf8_length()); + // We use String::hash_code here (rather than e.g. + // Symbol::identity_hash()) in order to have a hash code that + // does not change from run to run. We want that because the + // hash value for a secondary superclass appears in generated + // code as a constant. + + // This constant is magic: see Knuth, "Fibonacci Hashing". + constexpr uint multiplier + = 2654435769; // (uint)(((u8)1 << 32) / ((1 + sqrt(5)) / 2 )) + constexpr uint hash_shift = sizeof(hash_code) * 8 - 6; + // The leading bits of the least significant half of the product. + hash_code = (hash_code * multiplier) >> hash_shift; + + if (StressSecondarySupers) { + // Generate many hash collisions in order to stress-test the + // linear search fallback. + hash_code = hash_code % 3; + hash_code = hash_code * (SECONDARY_SUPERS_TABLE_SIZE / 3); + } + } + + return (hash_code & SECONDARY_SUPERS_TABLE_MASK); +} + void Klass::set_name(Symbol* n) { _name = n; - if (_name != nullptr) _name->increment_refcount(); + + if (_name != nullptr) { + _name->increment_refcount(); + } + + if (UseSecondarySupersTable) { + elapsedTimer selftime; + selftime.start(); + + _hash_slot = compute_hash_slot(n); + assert(_hash_slot < SECONDARY_SUPERS_TABLE_SIZE, "required"); + + selftime.stop(); + if (UsePerfData) { + ClassLoader::perf_secondary_hash_time()->inc(selftime.ticks()); + } + } if (CDSConfig::is_dumping_archive() && is_instance_klass()) { SystemDictionaryShared::init_dumptime_info(InstanceKlass::cast(this)); @@ -236,6 +296,175 @@ bool Klass::can_be_primary_super_slow() const { return true; } +void Klass::set_secondary_supers(Array* secondaries) { + assert(!UseSecondarySupersTable || secondaries == nullptr, ""); + set_secondary_supers(secondaries, SECONDARY_SUPERS_BITMAP_EMPTY); +} + +void Klass::set_secondary_supers(Array* secondaries, uintx bitmap) { +#ifdef ASSERT + if (UseSecondarySupersTable && secondaries != nullptr) { + uintx real_bitmap = compute_secondary_supers_bitmap(secondaries); + assert(bitmap == real_bitmap, "must be"); + } +#endif + _bitmap = bitmap; + _secondary_supers = secondaries; + + if (secondaries != nullptr) { + LogMessage(class, load) msg; + NonInterleavingLogStream log {LogLevel::Debug, msg}; + if (log.is_enabled()) { + ResourceMark rm; + log.print_cr("set_secondary_supers: hash_slot: %d; klass: %s", hash_slot(), external_name()); + print_secondary_supers_on(&log); + } + } +} + +// Hashed secondary superclasses +// +// We use a compressed 64-entry hash table with linear probing. We +// start by creating a hash table in the usual way, followed by a pass +// that removes all the null entries. To indicate which entries would +// have been null we use a bitmap that contains a 1 in each position +// where an entry is present, 0 otherwise. This bitmap also serves as +// a kind of Bloom filter, which in many cases allows us quickly to +// eliminate the possibility that something is a member of a set of +// secondaries. +uintx Klass::hash_secondary_supers(Array* secondaries, bool rewrite) { + const int length = secondaries->length(); + + if (length == 0) { + return SECONDARY_SUPERS_BITMAP_EMPTY; + } + + if (length == 1) { + int hash_slot = secondaries->at(0)->hash_slot(); + return uintx(1) << hash_slot; + } + + // For performance reasons we don't use a hashed table unless there + // are at least two empty slots in it. If there were only one empty + // slot it'd take a long time to create the table and the resulting + // search would be no faster than linear probing. + if (length > SECONDARY_SUPERS_TABLE_SIZE - 2) { + return SECONDARY_SUPERS_BITMAP_FULL; + } + + { + PerfTraceTime ptt(ClassLoader::perf_secondary_hash_time()); + + ResourceMark rm; + uintx bitmap = SECONDARY_SUPERS_BITMAP_EMPTY; + auto hashed_secondaries = new GrowableArray(SECONDARY_SUPERS_TABLE_SIZE, + SECONDARY_SUPERS_TABLE_SIZE, nullptr); + + for (int j = 0; j < length; j++) { + Klass* k = secondaries->at(j); + hash_insert(k, hashed_secondaries, bitmap); + } + + // Pack the hashed secondaries array by copying it into the + // secondaries array, sans nulls, if modification is allowed. + // Otherwise, validate the order. + int i = 0; + for (int slot = 0; slot < SECONDARY_SUPERS_TABLE_SIZE; slot++) { + bool has_element = ((bitmap >> slot) & 1) != 0; + assert(has_element == (hashed_secondaries->at(slot) != nullptr), ""); + if (has_element) { + Klass* k = hashed_secondaries->at(slot); + if (rewrite) { + secondaries->at_put(i, k); + } else if (secondaries->at(i) != k) { + assert(false, "broken secondary supers hash table"); + return SECONDARY_SUPERS_BITMAP_FULL; + } + i++; + } + } + assert(i == secondaries->length(), "mismatch"); + + return bitmap; + } +} + +void Klass::hash_insert(Klass* klass, GrowableArray* secondaries, uintx& bitmap) { + assert(bitmap != SECONDARY_SUPERS_BITMAP_FULL, ""); + + int dist = 0; + for (int slot = klass->hash_slot(); true; slot = (slot + 1) & SECONDARY_SUPERS_TABLE_MASK) { + Klass* existing = secondaries->at(slot); + assert(((bitmap >> slot) & 1) == (existing != nullptr), "mismatch"); + if (existing == nullptr) { // no conflict + secondaries->at_put(slot, klass); + bitmap |= uintx(1) << slot; + assert(bitmap != SECONDARY_SUPERS_BITMAP_FULL, ""); + return; + } else { + // Use Robin Hood hashing to minimize the worst case search. + // Also, every permutation of the insertion sequence produces + // the same final Robin Hood hash table, provided that a + // consistent tie breaker is used. + int existing_dist = (slot - existing->hash_slot()) & SECONDARY_SUPERS_TABLE_MASK; + if (existing_dist < dist + // This tie breaker ensures that the hash order is maintained. + || ((existing_dist == dist) + && (uintptr_t(existing) < uintptr_t(klass)))) { + Klass* tmp = secondaries->at(slot); + secondaries->at_put(slot, klass); + klass = tmp; + dist = existing_dist; + } + ++dist; + } + } +} + +Array* Klass::pack_secondary_supers(ClassLoaderData* loader_data, + GrowableArray* primaries, + GrowableArray* secondaries, + uintx& bitmap, TRAPS) { + int new_length = primaries->length() + secondaries->length(); + Array* secondary_supers = MetadataFactory::new_array(loader_data, new_length, CHECK_NULL); + + // Combine the two arrays into a metadata object to pack the array. + // The primaries are added in the reverse order, then the secondaries. + int fill_p = primaries->length(); + for (int j = 0; j < fill_p; j++) { + secondary_supers->at_put(j, primaries->pop()); // add primaries in reverse order. + } + for( int j = 0; j < secondaries->length(); j++ ) { + secondary_supers->at_put(j+fill_p, secondaries->at(j)); // add secondaries on the end. + } +#ifdef ASSERT + // We must not copy any null placeholders left over from bootstrap. + for (int j = 0; j < secondary_supers->length(); j++) { + assert(secondary_supers->at(j) != nullptr, "correct bootstrapping order"); + } +#endif + + if (UseSecondarySupersTable) { + bitmap = hash_secondary_supers(secondary_supers, /*rewrite=*/true); // rewrites freshly allocated array + } else { + bitmap = SECONDARY_SUPERS_BITMAP_EMPTY; + } + return secondary_supers; +} + +uintx Klass::compute_secondary_supers_bitmap(Array* secondary_supers) { + return hash_secondary_supers(secondary_supers, /*rewrite=*/false); // no rewrites allowed +} + +uint8_t Klass::compute_home_slot(Klass* k, uintx bitmap) { + uint8_t hash = k->hash_slot(); + if (hash > 0) { + return population_count(bitmap << (SECONDARY_SUPERS_TABLE_SIZE - hash)); + } + return 0; +} + + void Klass::initialize_supers(Klass* k, Array* transitive_interfaces, TRAPS) { if (k == nullptr) { set_super(nullptr); @@ -326,26 +555,9 @@ void Klass::initialize_supers(Klass* k, Array* transitive_interf primaries->push(p); } // Combine the two arrays into a metadata object to pack the array. - // The primaries are added in the reverse order, then the secondaries. - int new_length = primaries->length() + secondaries->length(); - Array* s2 = MetadataFactory::new_array( - class_loader_data(), new_length, CHECK); - int fill_p = primaries->length(); - for (int j = 0; j < fill_p; j++) { - s2->at_put(j, primaries->pop()); // add primaries in reverse order. - } - for( int j = 0; j < secondaries->length(); j++ ) { - s2->at_put(j+fill_p, secondaries->at(j)); // add secondaries on the end. - } - - #ifdef ASSERT - // We must not copy any null placeholders left over from bootstrap. - for (int j = 0; j < s2->length(); j++) { - assert(s2->at(j) != nullptr, "correct bootstrapping order"); - } - #endif - - set_secondary_supers(s2); + uintx bitmap = 0; + Array* s2 = pack_secondary_supers(class_loader_data(), primaries, secondaries, bitmap, CHECK); + set_secondary_supers(s2, bitmap); } } @@ -353,7 +565,7 @@ GrowableArray* Klass::compute_secondary_supers(int num_extra_slots, Array* transitive_interfaces) { assert(num_extra_slots == 0, "override for complex klasses"); assert(transitive_interfaces == nullptr, "sanity"); - set_secondary_supers(Universe::the_empty_klass_array()); + set_secondary_supers(Universe::the_empty_klass_array(), Universe::the_empty_klass_bitmap()); return nullptr; } @@ -552,6 +764,11 @@ void Klass::remove_unshareable_info() { // Null out class_loader_data because we don't share that yet. set_class_loader_data(nullptr); set_is_shared(); + + // FIXME: validation in Klass::hash_secondary_supers() may fail for shared klasses. + // Even though the bitmaps always match, the canonical order of elements in the table + // is not guaranteed to stay the same (see tie breaker during Robin Hood hashing in Klass::hash_insert). + //assert(compute_secondary_supers_bitmap(secondary_supers()) == _bitmap, "broken table"); } void Klass::remove_java_mirror() { @@ -957,3 +1174,100 @@ const char* Klass::class_in_module_of_loader(bool use_are, bool include_parent_l return class_description; } + +class LookupStats : StackObj { + private: + uint _no_of_samples; + uint _worst; + uint _worst_count; + uint _average; + uint _best; + uint _best_count; + public: + LookupStats() : _no_of_samples(0), _worst(0), _worst_count(0), _average(0), _best(INT_MAX), _best_count(0) {} + + ~LookupStats() { + assert(_best <= _worst || _no_of_samples == 0, "sanity"); + } + + void sample(uint value) { + ++_no_of_samples; + _average += value; + + if (_worst < value) { + _worst = value; + _worst_count = 1; + } else if (_worst == value) { + ++_worst_count; + } + + if (_best > value) { + _best = value; + _best_count = 1; + } else if (_best == value) { + ++_best_count; + } + } + + void print_on(outputStream* st) const { + st->print("best: %2d (%4.1f%%)", _best, (100.0 * _best_count) / _no_of_samples); + if (_best_count < _no_of_samples) { + st->print("; average: %4.1f; worst: %2d (%4.1f%%)", + (1.0 * _average) / _no_of_samples, + _worst, (100.0 * _worst_count) / _no_of_samples); + } + } +}; + +static void print_positive_lookup_stats(Array* secondary_supers, uintx bitmap, outputStream* st) { + int num_of_supers = secondary_supers->length(); + + LookupStats s; + for (int i = 0; i < num_of_supers; i++) { + Klass* secondary_super = secondary_supers->at(i); + int home_slot = Klass::compute_home_slot(secondary_super, bitmap); + uint score = 1 + ((i - home_slot) & Klass::SECONDARY_SUPERS_TABLE_MASK); + s.sample(score); + } + st->print("positive_lookup: "); s.print_on(st); +} + +static uint compute_distance_to_nearest_zero(int slot, uintx bitmap) { + assert(~bitmap != 0, "no zeroes"); + uintx start = rotate_right(bitmap, slot); + return count_trailing_zeros(~start); +} + +static void print_negative_lookup_stats(uintx bitmap, outputStream* st) { + LookupStats s; + for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { + uint score = compute_distance_to_nearest_zero(slot, bitmap); + s.sample(score); + } + st->print("negative_lookup: "); s.print_on(st); +} + +void Klass::print_secondary_supers_on(outputStream* st) const { + if (secondary_supers() != nullptr) { + if (UseSecondarySupersTable) { + st->print(" - "); st->print("%d elements;", _secondary_supers->length()); + st->print_cr(" bitmap: " UINTX_FORMAT_X_0 ";", _bitmap); + if (_bitmap != SECONDARY_SUPERS_BITMAP_EMPTY && + _bitmap != SECONDARY_SUPERS_BITMAP_FULL) { + st->print(" - "); print_positive_lookup_stats(secondary_supers(), _bitmap, st); st->cr(); + st->print(" - "); print_negative_lookup_stats(_bitmap, st); st->cr(); + } + } + } else { + st->print("null"); + } +} + +void Klass::on_secondary_supers_verification_failure(Klass* super, Klass* sub, bool linear_result, bool table_result, const char* msg) { + ResourceMark rm; + super->print(); + sub->print(); + fatal("%s: %s implements %s: is_subtype_of: %d; linear_search: %d; table_lookup: %d", + msg, sub->external_name(), super->external_name(), + sub->is_subtype_of(super), linear_result, table_result); +} diff --git a/src/hotspot/share/oops/klass.hpp b/src/hotspot/share/oops/klass.hpp index 0c9c05bed32..2923235e2f3 100644 --- a/src/hotspot/share/oops/klass.hpp +++ b/src/hotspot/share/oops/klass.hpp @@ -159,6 +159,12 @@ class Klass : public Metadata { // Provide access the corresponding instance java.lang.ClassLoader. ClassLoaderData* _class_loader_data; + // Bitmap and hash code used by hashed secondary supers. + uintx _bitmap; + uint8_t _hash_slot; + + static uint8_t compute_hash_slot(Symbol* s); + int _vtable_len; // vtable length. This field may be read very often when we // have lots of itable dispatches (e.g., lambdas and streams). // Keep it away from the beginning of a Klass to avoid cacheline @@ -231,7 +237,10 @@ protected: void set_secondary_super_cache(Klass* k) { _secondary_super_cache = k; } Array* secondary_supers() const { return _secondary_supers; } - void set_secondary_supers(Array* k) { _secondary_supers = k; } + void set_secondary_supers(Array* k); + void set_secondary_supers(Array* k, uintx bitmap); + + uint8_t hash_slot() const { return _hash_slot; } // Return the element of the _super chain of the given depth. // If there is no such element, return either null or this. @@ -382,7 +391,26 @@ protected: void set_subklass(Klass* s); void set_next_sibling(Klass* s); + private: + static void hash_insert(Klass* klass, GrowableArray* secondaries, uintx& bitmap); + static uintx hash_secondary_supers(Array* secondaries, bool rewrite); + public: + // Secondary supers table support + static Array* pack_secondary_supers(ClassLoaderData* loader_data, + GrowableArray* primaries, + GrowableArray* secondaries, + uintx& bitmap, + TRAPS); + + static uintx compute_secondary_supers_bitmap(Array* secondary_supers); + static uint8_t compute_home_slot(Klass* k, uintx bitmap); + + static constexpr int SECONDARY_SUPERS_TABLE_SIZE = sizeof(_bitmap) * 8; + static constexpr int SECONDARY_SUPERS_TABLE_MASK = SECONDARY_SUPERS_TABLE_SIZE - 1; + + static constexpr uintx SECONDARY_SUPERS_BITMAP_EMPTY = 0; + static constexpr uintx SECONDARY_SUPERS_BITMAP_FULL = ~(uintx)0; // Compiler support static ByteSize super_offset() { return byte_offset_of(Klass, _super); } @@ -399,6 +427,7 @@ protected: static ByteSize subklass_offset() { return byte_offset_of(Klass, _subklass); } static ByteSize next_sibling_offset() { return byte_offset_of(Klass, _next_sibling); } #endif + static ByteSize bitmap_offset() { return byte_offset_of(Klass, _bitmap); } // Unpacking layout_helper: static const int _lh_neutral_value = 0; // neutral non-array non-instance value @@ -711,6 +740,8 @@ protected: virtual void oop_print_value_on(oop obj, outputStream* st); virtual void oop_print_on (oop obj, outputStream* st); + void print_secondary_supers_on(outputStream* st) const; + virtual const char* internal_name() const = 0; // Verification @@ -725,6 +756,8 @@ protected: // for error reporting static bool is_valid(Klass* k); + + static void on_secondary_supers_verification_failure(Klass* super, Klass* sub, bool linear_result, bool table_result, const char* msg); }; #endif // SHARE_OOPS_KLASS_HPP diff --git a/src/hotspot/share/oops/objArrayKlass.cpp b/src/hotspot/share/oops/objArrayKlass.cpp index b477948917e..94d3c6b6b35 100644 --- a/src/hotspot/share/oops/objArrayKlass.cpp +++ b/src/hotspot/share/oops/objArrayKlass.cpp @@ -308,7 +308,8 @@ GrowableArray* ObjArrayKlass::compute_secondary_supers(int num_extra_slo int num_secondaries = num_extra_slots + 2 + num_elem_supers; if (num_secondaries == 2) { // Must share this for correct bootstrapping! - set_secondary_supers(Universe::the_array_interfaces_array()); + set_secondary_supers(Universe::the_array_interfaces_array(), + Universe::the_array_interfaces_bitmap()); return nullptr; } else { GrowableArray* secondaries = new GrowableArray(num_elem_supers+2); diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index bfb8146a650..62489f426c6 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -788,6 +788,9 @@ \ develop(bool, StressPrunedExceptionHandlers, false, \ "Always prune exception handlers") \ + \ + product(bool, InlineSecondarySupersTest, true, DIAGNOSTIC, \ + "Inline the secondary supers hash lookup.") \ // end of C2_FLAGS diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 692521dc246..386d3224889 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -2500,6 +2500,14 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) { n->del_req(3); break; } + case Op_PartialSubtypeCheck: { + if (UseSecondarySupersTable && n->in(2)->is_Con()) { + // PartialSubtypeCheck uses both constant and register operands for superclass input. + n->set_req(2, new BinaryNode(n->in(2), n->in(2))); + break; + } + break; + } default: break; } diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index fa57801d327..f7fdc167cb8 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -2454,6 +2454,12 @@ const Type* LoadNode::klass_value_common(PhaseGVN* phase) const { } } + if (tkls != nullptr && !UseSecondarySupersCache + && tkls->offset() == in_bytes(Klass::secondary_super_cache_offset())) { + // Treat Klass::_secondary_super_cache as a constant when the cache is disabled. + return TypePtr::NULL_PTR; + } + // Bailout case return LoadNode::Value(phase); } diff --git a/src/hotspot/share/runtime/abstract_vm_version.hpp b/src/hotspot/share/runtime/abstract_vm_version.hpp index 6fca13ece85..491d8a49dae 100644 --- a/src/hotspot/share/runtime/abstract_vm_version.hpp +++ b/src/hotspot/share/runtime/abstract_vm_version.hpp @@ -190,6 +190,9 @@ class Abstract_VM_Version: AllStatic { // Is recursive lightweight locking implemented for this platform? constexpr static bool supports_recursive_lightweight_locking() { return false; } + // Does platform support secondary supers table lookup? + constexpr static bool supports_secondary_supers_table() { return false; } + // Does platform support float16 instructions? static bool supports_float16() { return false; } diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 8ccd466d109..3b0916f46fc 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -3685,6 +3685,17 @@ jint Arguments::apply_ergo() { return code; } + if (FLAG_IS_DEFAULT(UseSecondarySupersTable)) { + FLAG_SET_DEFAULT(UseSecondarySupersTable, VM_Version::supports_secondary_supers_table()); + } else if (UseSecondarySupersTable && !VM_Version::supports_secondary_supers_table()) { + warning("UseSecondarySupersTable is not supported"); + FLAG_SET_DEFAULT(UseSecondarySupersTable, false); + } + if (!UseSecondarySupersTable) { + FLAG_SET_DEFAULT(StressSecondarySupers, false); + FLAG_SET_DEFAULT(VerifySecondarySupers, false); + } + #ifdef ZERO // Clear flags not supported on zero. FLAG_SET_DEFAULT(ProfileInterpreter, false); diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index f74930b62e1..5da2c0b80c5 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1998,6 +1998,18 @@ const int ObjectAlignmentInBytes = 8; "Unconditionally record nmethod dependencies on class " \ "rewriting/transformation independently of the JVMTI " \ "can_{retransform/redefine}_classes capabilities.") \ + \ + product(bool, UseSecondarySupersCache, true, DIAGNOSTIC, \ + "Use secondary supers cache during subtype checks.") \ + \ + product(bool, UseSecondarySupersTable, false, DIAGNOSTIC, \ + "Use hash table to lookup secondary supers.") \ + \ + product(bool, VerifySecondarySupers, false, DIAGNOSTIC, \ + "Check that linear and hashed secondary lookups return the same result.") \ + \ + product(bool, StressSecondarySupers, false, DIAGNOSTIC, \ + "Use a terrible hash function in order to generate many collisions.") \ // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp index ff32240905d..4e5cd7f0389 100644 --- a/src/hotspot/share/runtime/stubRoutines.cpp +++ b/src/hotspot/share/runtime/stubRoutines.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #include "asm/macroAssembler.inline.hpp" #include "memory/resourceArea.hpp" #include "oops/access.inline.hpp" +#include "oops/klass.hpp" #include "oops/oop.inline.hpp" #include "prims/vectorSupport.hpp" #include "runtime/continuation.hpp" @@ -191,6 +192,10 @@ JFR_ONLY(address StubRoutines::_jfr_return_lease = nullptr;) address StubRoutines::_upcall_stub_exception_handler = nullptr; +address StubRoutines::_lookup_secondary_supers_table_slow_path_stub = nullptr; +address StubRoutines::_lookup_secondary_supers_table_stubs[Klass::SECONDARY_SUPERS_TABLE_SIZE] = { nullptr }; + + // Initialization // // Note: to break cycle with universe initialization, stubs are generated in two phases. diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp index 03756f92d1c..d7adf320131 100644 --- a/src/hotspot/share/runtime/stubRoutines.hpp +++ b/src/hotspot/share/runtime/stubRoutines.hpp @@ -274,6 +274,9 @@ class StubRoutines: AllStatic { static address _upcall_stub_exception_handler; + static address _lookup_secondary_supers_table_stubs[]; + static address _lookup_secondary_supers_table_slow_path_stub; + public: // Initialization/Testing static void initialize_initial_stubs(); // must happen before universe::genesis @@ -479,6 +482,17 @@ class StubRoutines: AllStatic { return _upcall_stub_exception_handler; } + static address lookup_secondary_supers_table_stub(u1 slot) { + assert(slot < Klass::SECONDARY_SUPERS_TABLE_SIZE, "out of bounds"); + assert(_lookup_secondary_supers_table_stubs[slot] != nullptr, "not implemented"); + return _lookup_secondary_supers_table_stubs[slot]; + } + + static address lookup_secondary_supers_table_slow_path_stub() { + assert(_lookup_secondary_supers_table_slow_path_stub != nullptr, "not implemented"); + return _lookup_secondary_supers_table_slow_path_stub; + } + static address select_fill_function(BasicType t, bool aligned, const char* &name); // diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index ec47f3c62fb..87af33d97e7 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -150,6 +150,11 @@ class oopDesc; #define INTX_FORMAT_W(width) "%" #width PRIdPTR #define UINTX_FORMAT "%" PRIuPTR #define UINTX_FORMAT_X "0x%" PRIxPTR +#ifdef _LP64 +#define UINTX_FORMAT_X_0 "0x%016" PRIxPTR +#else +#define UINTX_FORMAT_X_0 "0x%08" PRIxPTR +#endif #define UINTX_FORMAT_W(width) "%" #width PRIuPTR // Format jlong, if necessary diff --git a/src/hotspot/share/utilities/rotate_bits.hpp b/src/hotspot/share/utilities/rotate_bits.hpp new file mode 100644 index 00000000000..7228cca108c --- /dev/null +++ b/src/hotspot/share/utilities/rotate_bits.hpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_UTILITIES_ROTATE_BITS_HPP +#define SHARE_UTILITIES_ROTATE_BITS_HPP + +#include "utilities/globalDefinitions.hpp" + +inline uint32_t rotate_right_32(uint32_t x, int distance) { + distance = distance & 0x1F; + if (distance > 0) { + return (x >> distance) | (x << (32 - distance)); + } else { + return x; + } +} + +inline uint64_t rotate_right_64(uint64_t x, int distance) { + distance = distance & 0x3F; + if (distance > 0) { + return (x >> distance) | (x << (64 - distance)); + } else { + return x; + } +} + +template::value), +ENABLE_IF(sizeof(T) <= sizeof(uint64_t))> +inline T rotate_right(T x, int dist) { + return (sizeof(x) <= sizeof(uint32_t)) ? + rotate_right_32(static_cast(x), dist) : + rotate_right_64(static_cast(x), dist); +} + +#endif // SHARE_UTILITIES_ROTATE_BITS_HPP diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java new file mode 100644 index 00000000000..aaaf0edb258 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheHits.java @@ -0,0 +1,108 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(1) +@State(Scope.Benchmark) +public class SecondarySuperCacheHits { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on a happy path, when we can actually cache + // the last super and use it effectively. + + interface I01 {} + interface I02 {} + interface I03 {} + interface I04 {} + interface I05 {} + interface I06 {} + interface I07 {} + interface I08 {} + interface I09 {} + interface I10 {} + interface I11 {} + interface I12 {} + interface I13 {} + interface I14 {} + interface I15 {} + interface I16 {} + interface I17 {} + interface I18 {} + interface I19 {} + interface I20 {} + + class B {} + class C1 extends B implements I01, I02, I03, I04, I05, I06, I07, I08, I09, I10, I11, I12, I13, I14, I15, I16, I17, I18, I19, I20 {} + + volatile B o; + + @Setup + public void setup() { + o = new C1(); + } + + static final int ITERS = 10000; + + @Benchmark + @OperationsPerInvocation(20*ITERS) + public void test(Blackhole bh) { + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I01); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I02); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I03); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I04); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I05); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I06); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I07); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I08); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I09); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I10); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I11); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I12); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I13); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I14); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I15); + + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I16); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I17); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I18); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I19); + for (int c = 0; c < ITERS; c++) bh.consume(o instanceof I20); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java new file mode 100644 index 00000000000..3cafa582c09 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheInterContention.java @@ -0,0 +1,81 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(Threads.MAX) +@State(Scope.Benchmark) +public class SecondarySuperCacheInterContention { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on unhappy path, when we contend a lot over + // the secondary super cache, where different threads want to update the cache + // with different value. In tihs test, every thread comes with its own stable + // cached value. Meaning, this tests the INTER-thread contention. + + interface IA {} + interface IB {} + class B {} + class C1 extends B implements IA, IB {} + class C2 extends B implements IA, IB {} + + volatile B o1, o2; + + @Setup + public void setup() { + o1 = new C1(); + o2 = new C2(); + } + + @Benchmark + @OperationsPerInvocation(2) + @Group("test") + @GroupThreads(1) + public void t1(Blackhole bh) { + bh.consume(o1 instanceof IA); + bh.consume(o2 instanceof IA); + } + + @Benchmark + @OperationsPerInvocation(2) + @Group("test") + @GroupThreads(1) + public void t2(Blackhole bh) { + bh.consume(o1 instanceof IB); + bh.consume(o2 instanceof IB); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java new file mode 100644 index 00000000000..b97d49e2e60 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/SecondarySuperCacheIntraContention.java @@ -0,0 +1,72 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 5, time = 300, timeUnit = TimeUnit.MILLISECONDS) +@Fork(value = 3, jvmArgsAppend = {"-XX:+TieredCompilation", "-XX:TieredStopAtLevel=1"}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Threads(Threads.MAX) +@State(Scope.Benchmark) +public class SecondarySuperCacheIntraContention { + + // This test targets C1 specifically, to enter the interesting code path + // without heavily optimizing compiler like C2 optimizing based on profiles, + // or folding the instanceof checks. + + // The test verifies what happens on unhappy path, when we contend a lot over + // the secondary super cache, where different threads want to update the cache + // with different value. In this test, every thread comes with its own contending + // value. Meaning, this tests the INTRA-thread contention. + + interface IA {} + interface IB {} + class B {} + class C1 extends B implements IA, IB {} + class C2 extends B implements IA, IB {} + + volatile B o1, o2; + + @Setup + public void setup() { + o1 = new C1(); + o2 = new C2(); + } + + @Benchmark + @OperationsPerInvocation(4) + public void test(Blackhole bh) { + bh.consume(o1 instanceof IA); + bh.consume(o2 instanceof IA); + bh.consume(o1 instanceof IB); + bh.consume(o2 instanceof IB); + } + +} diff --git a/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java b/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java new file mode 100644 index 00000000000..54e5b081672 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/lang/SecondarySupersLookup.java @@ -0,0 +1,310 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.lang; + +import org.openjdk.jmh.annotations.*; +import java.util.concurrent.TimeUnit; +import java.util.Random; + + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 1, time = 1) +@Measurement(iterations = 3, time = 1) +@Fork(value = 5) +public class SecondarySupersLookup { + interface J {} + interface I01 {} + interface I02 extends I01 {} + interface I03 extends I02 {} + interface I04 extends I03 {} + interface I05 extends I04 {} + interface I06 extends I05 {} + interface I07 extends I06 {} + interface I08 extends I07 {} + interface I09 extends I08 {} + interface I10 extends I09 {} + interface I11 extends I10 {} + interface I12 extends I11 {} + interface I13 extends I12 {} + interface I14 extends I13 {} + interface I15 extends I14 {} + interface I16 extends I15 {} + interface I17 extends I16 {} + interface I18 extends I17 {} + interface I19 extends I18 {} + interface I20 extends I19 {} + interface I21 extends I20 {} + interface I22 extends I21 {} + interface I23 extends I22 {} + interface I24 extends I23 {} + interface I25 extends I24 {} + interface I26 extends I25 {} + interface I27 extends I26 {} + interface I28 extends I27 {} + interface I29 extends I28 {} + interface I30 extends I29 {} + interface I31 extends I30 {} + interface I32 extends I31 {} + interface I33 extends I32 {} + interface I34 extends I33 {} + interface I35 extends I34 {} + interface I36 extends I35 {} + interface I37 extends I36 {} + interface I38 extends I37 {} + interface I39 extends I38 {} + interface I40 extends I39 {} + interface I41 extends I40 {} + interface I42 extends I41 {} + interface I43 extends I42 {} + interface I44 extends I43 {} + interface I45 extends I44 {} + interface I46 extends I45 {} + interface I47 extends I46 {} + interface I48 extends I47 {} + interface I49 extends I48 {} + interface I50 extends I49 {} + interface I51 extends I50 {} + interface I52 extends I51 {} + interface I53 extends I52 {} + interface I54 extends I53 {} + interface I55 extends I54 {} + interface I56 extends I55 {} + interface I57 extends I56 {} + interface I58 extends I57 {} + interface I59 extends I58 {} + interface I60 extends I59 {} + interface I61 extends I60 {} + interface I62 extends I61 {} + interface I63 extends I62 {} + interface I64 extends I63 {} + + final Object obj00 = new Object(); + final Object obj01 = new I01() {}; + final Object obj02 = new I02() {}; + final Object obj03 = new I03() {}; + final Object obj04 = new I04() {}; + final Object obj05 = new I05() {}; + final Object obj06 = new I06() {}; + final Object obj07 = new I07() {}; + final Object obj08 = new I08() {}; + final Object obj09 = new I09() {}; + final Object obj10 = new I10() {}; + final Object obj16 = new I16() {}; + final Object obj20 = new I20() {}; + final Object obj30 = new I30() {}; + final Object obj32 = new I32() {}; + final Object obj40 = new I40() {}; + final Object obj50 = new I50() {}; + final Object obj55 = new I55() {}; + final Object obj56 = new I56() {}; + final Object obj57 = new I57() {}; + final Object obj58 = new I58() {}; + final Object obj59 = new I59() {}; + final Object obj60 = new I60() {}; + final Object obj61 = new I61() {}; + final Object obj62 = new I62() {}; + final Object obj63 = new I63() {}; + final Object obj64 = new I64() {}; + + static Class getSuper(int idx) { + int i = Math.abs(idx) % 10; + switch (i) { + case 0: return I01.class; + case 1: return I02.class; + case 2: return I03.class; + case 3: return I04.class; + case 4: return I05.class; + case 5: return I06.class; + case 6: return I07.class; + case 7: return I08.class; + case 8: return I09.class; + case 9: return I10.class; + } + throw new InternalError("" + i); + } + + @Setup + public void warmup() { + for (int i = 0; i < 20_000; i++) { + Class s = getSuper(i); + test(obj01, s, s.isInstance(obj01)); + test(obj02, s, s.isInstance(obj02)); + test(obj03, s, s.isInstance(obj03)); + test(obj04, s, s.isInstance(obj04)); + test(obj05, s, s.isInstance(obj05)); + test(obj06, s, s.isInstance(obj06)); + test(obj07, s, s.isInstance(obj07)); + test(obj08, s, s.isInstance(obj08)); + test(obj09, s, s.isInstance(obj09)); + } + } + + private static void test(Object obj, Class cls, boolean expected) { + if (cls.isInstance(obj) != expected) { + throw new InternalError(obj.getClass() + " " + cls + " " + expected); + } + } + @Benchmark + public void testPositive01() { + test(obj01, I01.class, true); + } + @Benchmark public void testPositive02() { + test(obj02, I02.class, true); + } + @Benchmark public void testPositive03() { + test(obj03, I03.class, true); + } + @Benchmark public void testPositive04() { + test(obj04, I04.class, true); + } + @Benchmark public void testPositive05() { + test(obj05, I05.class, true); + } + @Benchmark public void testPositive06() { + test(obj06, I06.class, true); + } + @Benchmark public void testPositive07() { + test(obj07, I07.class, true); + } + @Benchmark public void testPositive08() { + test(obj08, I08.class, true); + } + @Benchmark public void testPositive09() { + test(obj09, I09.class, true); + } + @Benchmark public void testPositive10() { + test(obj10, I10.class, true); + } + @Benchmark public void testPositive16() { + test(obj16, I16.class, true); + } + @Benchmark public void testPositive20() { + test(obj20, I20.class, true); + } + @Benchmark public void testPositive30() { + test(obj30, I30.class, true); + } + @Benchmark public void testPositive32() { + test(obj32, I32.class, true); + } + @Benchmark public void testPositive40() { + test(obj40, I40.class, true); + } + @Benchmark public void testPositive50() { + test(obj50, I50.class, true); + } + @Benchmark public void testPositive60() { + test(obj60, I60.class, true); + } + @Benchmark public void testPositive63() { + test(obj63, I63.class, true); + } + @Benchmark public void testPositive64() { + test(obj64, I64.class, true); + } + + @Benchmark public void testNegative00() { + test(obj00, J.class, false); + } + @Benchmark public void testNegative01() { + test(obj01, J.class, false); + } + @Benchmark public void testNegative02() { + test(obj02, J.class, false); + } + @Benchmark public void testNegative03() { + test(obj03, J.class, false); + } + @Benchmark public void testNegative04() { + test(obj04, J.class, false); + } + @Benchmark public void testNegative05() { + test(obj05, J.class, false); + } + @Benchmark public void testNegative06() { + test(obj06, J.class, false); + } + @Benchmark public void testNegative07() { + test(obj07, J.class, false); + } + @Benchmark public void testNegative08() { + test(obj08, J.class, false); + } + @Benchmark public void testNegative09() { + test(obj09, J.class, false); + } + @Benchmark public void testNegative10() { + test(obj10, J.class, false); + } + @Benchmark public void testNegative16() { + test(obj16, J.class, false); + } + @Benchmark public void testNegative20() { + test(obj20, J.class, false); + } + @Benchmark public void testNegative30() { + test(obj30, J.class, false); + } + @Benchmark public void testNegative32() { + test(obj32, J.class, false); + } + @Benchmark public void testNegative40() { + test(obj40, J.class, false); + } + @Benchmark public void testNegative50() { + test(obj50, J.class, false); + } + @Benchmark public void testNegative55() { + test(obj55, J.class, false); + } + @Benchmark public void testNegative56() { + test(obj56, J.class, false); + } + @Benchmark public void testNegative57() { + test(obj57, J.class, false); + } + @Benchmark public void testNegative58() { + test(obj58, J.class, false); + } + @Benchmark public void testNegative59() { + test(obj59, J.class, false); + } + @Benchmark public void testNegative60() { + test(obj60, J.class, false); + } + @Benchmark public void testNegative61() { + test(obj61, J.class, false); + } + @Benchmark public void testNegative62() { + test(obj62, J.class, false); + } + @Benchmark public void testNegative63() { + test(obj63, J.class, false); + } + + @Benchmark public void testNegative64() { + test(obj64, J.class, false); + } +} diff --git a/test/micro/org/openjdk/bench/vm/lang/TypePollution.java b/test/micro/org/openjdk/bench/vm/lang/TypePollution.java new file mode 100644 index 00000000000..8109e205d15 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/lang/TypePollution.java @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2024, Red Hat, Inc.. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.lang; + +import org.openjdk.jmh.annotations.*; + +import java.io.Serializable; +import java.lang.reflect.*; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.TimeUnit; +import java.util.function.*; + +/* + * A test to demonstrate type pollution. + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Warmup(iterations = 4, time = 2) +@Measurement(iterations = 4, time = 2) +@Fork(value = 3) +public class TypePollution { + + static class DynamicInvocationHandler implements InvocationHandler { + + @Override + public Object invoke(Object proxy, Method method, Object[] args) { + return null; + } + } + + interface I01 {} + interface I02 {} + interface I03 {} + interface I04 {} + interface I05 {} + interface I06 {} + interface I07 {} + interface I08 {} + interface I09 {} + interface I10 {} + interface I11 {} + interface I12 {} + interface I13 {} + interface I14 {} + interface I15 {} + interface I16 {} + interface I17 {} + interface I18 {} + interface I19 {} + interface I20 {} + + static Class[] classes; + + static { + classes = new Class[] { I01.class, I02.class, I03.class, I04.class, I05.class, + I06.class, I07.class, I08.class, I09.class, I10.class, + I11.class, I12.class, I13.class, I14.class, I15.class, + I16.class, I17.class, I18.class, I19.class, I20.class }; + } + + private static final int NOOFOBJECTS = 100; + + public Object[] objectArray; + + public Random rand = new Random(0); + + @Setup(Level.Trial) + public void setup() { + objectArray = new Object[1000]; + var loader = getClass().getClassLoader(); + Class[] someInterfaces = new Class[0]; + for (int i = 0; i < objectArray.length; i++) { + Set> aSet = new HashSet>(); + for (int j = 0; j < 6; j++) { + aSet.add(classes[rand.nextInt(classes.length)]); + } + Class[] interfaceArray = new Class[aSet.size()]; + interfaceArray = aSet.toArray(interfaceArray); + objectArray[i] = Proxy.newProxyInstance(loader, interfaceArray, new DynamicInvocationHandler()); + } + } + + int probe = 99; + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:-UseSecondarySuperCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchLinearNoSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:+UseSecondarySuperCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchLinearSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:-UseSecondarySuperCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchTableNoSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:+UseSecondarySuperCache"}) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long parallelInstanceOfInterfaceSwitchTableSCC() { + return parallelInstanceOfInterfaceSwitch(); + } + + long parallelInstanceOfInterfaceSwitch() { + Supplier s = () -> { + long sum = 0; + for (int i = 0; i < 10000; i++) { + sum += instanceOfInterfaceSwitch(); + } + return sum; + }; + try { + CompletableFuture future = CompletableFuture.supplyAsync(s); + return s.get() + future.get(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:-UseSecondarySuperCache"}) + public int instanceOfInterfaceSwitchLinearNoSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:-UseSecondarySupersTable", "-XX:+UseSecondarySuperCache"}) + public int instanceOfInterfaceSwitchLinearSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:-UseSecondarySuperCache"}) + public int instanceOfInterfaceSwitchTableNoSCC() { + return instanceOfInterfaceSwitch(); + } + + @Benchmark + @Fork(jvmArgsAppend={"-XX:+UnlockDiagnosticVMOptions", "-XX:+UseSecondarySupersTable", "-XX:+UseSecondarySuperCache"}) + public int instanceOfInterfaceSwitchTableSCC() { + return instanceOfInterfaceSwitch(); + } + + int instanceOfInterfaceSwitch() { + int dummy = 0; + for (int i = 0; i < 100; i++) { + probe ^= probe << 13; // xorshift + probe ^= probe >>> 17; + probe ^= probe << 5; + dummy += switch(objectArray[Math.abs(probe) % objectArray.length]) { + case I01 inst -> 1; + case I02 inst -> 2; + case I03 inst -> 3; + case I04 inst -> 4; + case I05 inst -> 5; + case I06 inst -> 6; + case I07 inst -> 7; + case I08 inst -> 8; + default -> 10; + }; + probe ^= probe << 13; // xorshift + probe ^= probe >>> 17; + probe ^= probe << 5; + dummy += switch(objectArray[Math.abs(probe) % objectArray.length]) { + case I18 inst -> 8; + case I17 inst -> 7; + case I16 inst -> 6; + case I15 inst -> 5; + case I14 inst -> 4; + case I13 inst -> 3; + case I12 inst -> 2; + case I11 inst -> 1; + default -> 0; + }; + } + return dummy; + } +}