From 033cced6e11bbe7862d9cdd279264b3098d294ba Mon Sep 17 00:00:00 2001 From: Thomas Stuefe Date: Wed, 29 Nov 2023 13:16:38 +0000 Subject: [PATCH] 8320368: Per-CPU optimization of Klass range reservation Reviewed-by: rkennke, rehn --- .../cpu/aarch64/compressedKlass_aarch64.cpp | 133 ++++++++++++++++++ src/hotspot/cpu/ppc/compressedKlass_ppc.cpp | 48 +++++++ .../cpu/riscv/compressedKlass_riscv.cpp | 76 ++++++++++ src/hotspot/cpu/s390/compressedKlass_s390.cpp | 52 +++++++ src/hotspot/cpu/x86/compressedKlass_x86.cpp | 49 +++++++ src/hotspot/os/posix/os_posix.cpp | 5 +- src/hotspot/os/windows/os_windows.cpp | 7 +- src/hotspot/share/cds/metaspaceShared.cpp | 6 +- src/hotspot/share/memory/metaspace.cpp | 52 ++----- src/hotspot/share/memory/metaspace.hpp | 2 +- src/hotspot/share/oops/compressedKlass.cpp | 104 ++++++++------ src/hotspot/share/oops/compressedKlass.hpp | 17 ++- src/hotspot/share/runtime/globals.hpp | 4 + src/hotspot/share/runtime/os.cpp | 11 +- ...essedCPUSpecificClassSpaceReservation.java | 132 +++++++++++++++++ 15 files changed, 590 insertions(+), 108 deletions(-) create mode 100644 src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp create mode 100644 src/hotspot/cpu/ppc/compressedKlass_ppc.cpp create mode 100644 src/hotspot/cpu/riscv/compressedKlass_riscv.cpp create mode 100644 src/hotspot/cpu/s390/compressedKlass_s390.cpp create mode 100644 src/hotspot/cpu/x86/compressedKlass_x86.cpp create mode 100644 test/hotspot/jtreg/runtime/CompressedOops/CompressedCPUSpecificClassSpaceReservation.java diff --git a/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp new file mode 100644 index 00000000000..d035ab21093 --- /dev/null +++ b/src/hotspot/cpu/aarch64/compressedKlass_aarch64.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "logging/log.hpp" +#include "oops/compressedKlass.hpp" +#include "memory/metaspace.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" + +// Helper function; reserve at an address that is compatible with EOR +static char* reserve_at_eor_compatible_address(size_t size, bool aslr) { + char* result = nullptr; + + log_debug(metaspace, map)("Trying to reserve at an EOR-compatible address"); + + // We need immediates that are 32-bit aligned, since they should not intersect nKlass + // bits. They should not be larger than the addressable space either, but we still + // lack a good abstraction for that (see JDK-8320584), therefore we assume and hard-code + // 2^48 as a reasonable higher ceiling. + static const uint16_t immediates[] = { + 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0007, 0x0008, 0x000c, 0x000e, + 0x000f, 0x0010, 0x0018, 0x001c, 0x001e, 0x001f, 0x0020, 0x0030, 0x0038, + 0x003c, 0x003e, 0x003f, 0x0040, 0x0060, 0x0070, 0x0078, 0x007c, 0x007e, + 0x007f, 0x0080, 0x00c0, 0x00e0, 0x00f0, 0x00f8, 0x00fc, 0x00fe, 0x00ff, + 0x0100, 0x0180, 0x01c0, 0x01e0, 0x01f0, 0x01f8, 0x01fc, 0x01fe, 0x01ff, + 0x0200, 0x0300, 0x0380, 0x03c0, 0x03e0, 0x03f0, 0x03f8, 0x03fc, 0x03fe, + 0x03ff, 0x0400, 0x0600, 0x0700, 0x0780, 0x07c0, 0x07e0, 0x07f0, 0x07f8, + 0x07fc, 0x07fe, 0x07ff, 0x0800, 0x0c00, 0x0e00, 0x0f00, 0x0f80, 0x0fc0, + 0x0fe0, 0x0ff0, 0x0ff8, 0x0ffc, 0x0ffe, 0x0fff, 0x1000, 0x1800, 0x1c00, + 0x1e00, 0x1f00, 0x1f80, 0x1fc0, 0x1fe0, 0x1ff0, 0x1ff8, 0x1ffc, 0x1ffe, + 0x1fff, 0x2000, 0x3000, 0x3800, 0x3c00, 0x3e00, 0x3f00, 0x3f80, 0x3fc0, + 0x3fe0, 0x3ff0, 0x3ff8, 0x3ffc, 0x3ffe, 0x3fff, 0x4000, 0x6000, 0x7000, + 0x7800, 0x7c00, 0x7e00, 0x7f00, 0x7f80, 0x7fc0, 0x7fe0, 0x7ff0, 0x7ff8, + 0x7ffc, 0x7ffe, 0x7fff + }; + static constexpr int num_immediates = sizeof(immediates) / sizeof(immediates[0]); + const int start_index = aslr ? os::random() : 0; + constexpr int max_tries = 64; + for (int ntry = 0; result == nullptr && ntry < max_tries; ntry ++) { + // As in os::attempt_reserve_memory_between, we alternate between higher and lower + // addresses; this maximizes the chance of early success if part of the address space + // is not accessible (e.g. 39-bit address space). + const int alt_index = (ntry & 1) ? 0 : num_immediates / 2; + const int index = (start_index + ntry + alt_index) % num_immediates; + const uint64_t immediate = ((uint64_t)immediates[index]) << 32; + assert(immediate > 0 && Assembler::operand_valid_for_logical_immediate(/*is32*/false, immediate), + "Invalid immediate %d " UINT64_FORMAT, index, immediate); + result = os::attempt_reserve_memory_at((char*)immediate, size, false); + if (result == nullptr) { + log_trace(metaspace, map)("Failed to attach at " UINT64_FORMAT_X, immediate); + } + } + if (result == nullptr) { + log_debug(metaspace, map)("Failed to reserve at any EOR-compatible address"); + } + return result; +} +char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) { + + char* result = nullptr; + + // Optimize for base=0 shift=0 + if (optimize_for_zero_base) { + result = reserve_address_space_for_unscaled_encoding(size, aslr); + } + + // If this fails, we don't bother aiming for zero-based encoding (base=0 shift>0), since it has no + // advantages over EOR or movk mode. + + // EOR-compatible reservation + if (result == nullptr) { + result = reserve_at_eor_compatible_address(size, aslr); + } + + // Movk-compatible reservation via probing. + if (result == nullptr) { + result = reserve_address_space_for_16bit_move(size, aslr); + } + + // Movk-compatible reservation via overallocation. + // If that failed, attempt to allocate at any 4G-aligned address. Let the system decide where. For ASLR, + // we now rely on the system. + // Compared with the probing done above, this has two disadvantages: + // - on a kernel with 52-bit address space we may get an address that has bits set between [48, 52). + // In that case, we may need two movk moves (not yet implemented). + // - this technique leads to temporary over-reservation of address space; it will spike the vsize of + // the process. Therefore it may fail if a vsize limit is in place (e.g. ulimit -v). + if (result == nullptr) { + constexpr size_t alignment = nth_bit(32); + log_debug(metaspace, map)("Trying to reserve at a 32-bit-aligned address"); + result = os::reserve_memory_aligned(size, alignment, false); + } + + return result; +} + +void CompressedKlassPointers::initialize(address addr, size_t len) { + constexpr uintptr_t unscaled_max = nth_bit(32); + assert(len <= unscaled_max, "Klass range larger than 32 bits?"); + + // Shift is always 0 on aarch64. + _shift = 0; + + // On aarch64, we don't bother with zero-based encoding (base=0 shift>0). + address const end = addr + len; + _base = (end <= (address)unscaled_max) ? nullptr : addr; + + _range = end - _base; +} diff --git a/src/hotspot/cpu/ppc/compressedKlass_ppc.cpp b/src/hotspot/cpu/ppc/compressedKlass_ppc.cpp new file mode 100644 index 00000000000..51012eef865 --- /dev/null +++ b/src/hotspot/cpu/ppc/compressedKlass_ppc.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "oops/compressedKlass.hpp" +#include "utilities/globalDefinitions.hpp" + +char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) { + + char* result = nullptr; + + // Optimize for base=0 shift=0; failing that, for base=0 shift>0 + if (optimize_for_zero_base) { + result = reserve_address_space_for_unscaled_encoding(size, aslr); + if (result == nullptr) { + result = reserve_address_space_for_zerobased_encoding(size, aslr); + } + } + + // Optimize for a single 16-bit move: a base that has only bits set in its third quadrant [32..48). + if (result == nullptr) { + result = reserve_address_space_for_16bit_move(size, aslr); + } + + return result; +} diff --git a/src/hotspot/cpu/riscv/compressedKlass_riscv.cpp b/src/hotspot/cpu/riscv/compressedKlass_riscv.cpp new file mode 100644 index 00000000000..cffadb4189b --- /dev/null +++ b/src/hotspot/cpu/riscv/compressedKlass_riscv.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "oops/compressedKlass.hpp" +#include "utilities/globalDefinitions.hpp" + +char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) { + + char* result = nullptr; + + // RiscV loads a 64-bit immediate in up to four separate steps, splitting it into four different sections + // (two 32-bit sections, each split into two subsections of 20/12 bits). + // + // 63 ....... 44 43 ... 32 31 ....... 12 11 ... 0 + // D C B A + // + // A "good" base is, in this order: + // 1) only bits in A; this would be an address < 4KB, which is unrealistic on normal Linux boxes since + // the typical default for vm.mmap_min_address is 64KB. We ignore that. + // 2) only bits in B: a 12-bit-aligned address below 4GB. 12 bit = 4KB, but since mmap reserves at + // page boundaries, we can ignore the alignment. + // 3) only bits in C: a 4GB-aligned address that is lower than 16TB. + // 4) only bits in D: a 16TB-aligned address. + + // First, attempt to allocate < 4GB. We do this unconditionally: + // - if can_optimize_for_zero_base, a <4GB mapping start would allow us to run unscaled (base = 0, shift = 0) + // - if !can_optimize_for_zero_base, a <4GB mapping start is still good, the resulting immediate can be encoded + // with one instruction (2) + result = reserve_address_space_for_unscaled_encoding(size, aslr); + + // Failing that, attempt to reserve for base=zero shift>0 + if (result == nullptr && optimize_for_zero_base) { + result = reserve_address_space_for_zerobased_encoding(size, aslr); + } + + // Failing that, optimize for case (3) - a base with only bits set between [33-44) + if (result == nullptr) { + const uintptr_t from = nth_bit(32 + (optimize_for_zero_base ? LogKlassAlignmentInBytes : 0)); + constexpr uintptr_t to = nth_bit(44); + constexpr size_t alignment = nth_bit(32); + result = reserve_address_space_X(from, to, size, alignment, aslr); + } + + // Failing that, optimize for case (4) - a base with only bits set between [44-64) + if (result == nullptr) { + constexpr uintptr_t from = nth_bit(44); + constexpr uintptr_t to = UINT64_MAX; + constexpr size_t alignment = nth_bit(44); + result = reserve_address_space_X(from, to, size, alignment, aslr); + } + + return result; +} diff --git a/src/hotspot/cpu/s390/compressedKlass_s390.cpp b/src/hotspot/cpu/s390/compressedKlass_s390.cpp new file mode 100644 index 00000000000..868df0f02d7 --- /dev/null +++ b/src/hotspot/cpu/s390/compressedKlass_s390.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "oops/compressedKlass.hpp" +#include "utilities/globalDefinitions.hpp" + +char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) { + + char* result = nullptr; + + uintptr_t tried_below = 0; + + // First, attempt to allocate < 4GB. We do this unconditionally: + // - if optimize_for_zero_base, a <4GB mapping start allows us to use base=0 shift=0 + // - if !optimize_for_zero_base, a <4GB mapping start allows us to use algfi + result = reserve_address_space_for_unscaled_encoding(size, aslr); + + // Failing that, try optimized for base=0 shift>0 + if (result == nullptr && optimize_for_zero_base) { + result = reserve_address_space_for_zerobased_encoding(size, aslr); + } + + // Failing that, aim for a base that is 4G-aligned; such a base can be set with aih. + if (result == nullptr) { + result = reserve_address_space_for_16bit_move(size, aslr); + } + + return result; +} diff --git a/src/hotspot/cpu/x86/compressedKlass_x86.cpp b/src/hotspot/cpu/x86/compressedKlass_x86.cpp new file mode 100644 index 00000000000..5b5a405bcef --- /dev/null +++ b/src/hotspot/cpu/x86/compressedKlass_x86.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#ifdef _LP64 + +#include "oops/compressedKlass.hpp" +#include "utilities/globalDefinitions.hpp" + +char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base) { + + char* result = nullptr; + + // Optimize for unscaled encoding; failing that, for zero-based encoding: + if (optimize_for_zero_base) { + result = reserve_address_space_for_unscaled_encoding(size, aslr); + if (result == nullptr) { + result = reserve_address_space_for_zerobased_encoding(size, aslr); + } + } // end: low-address reservation + + // Nothing more to optimize for on x64. If base != 0, we will always emit the full 64-bit immediate. + return result; +} + +#endif // _LP64 diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp index 1b75693b138..960fb465590 100644 --- a/src/hotspot/os/posix/os_posix.cpp +++ b/src/hotspot/os/posix/os_posix.cpp @@ -343,9 +343,10 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in } static size_t calculate_aligned_extra_size(size_t size, size_t alignment) { - assert((alignment & (os::vm_allocation_granularity() - 1)) == 0, + assert(is_aligned(alignment, os::vm_allocation_granularity()), "Alignment must be a multiple of allocation granularity (page size)"); - assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned"); + assert(is_aligned(size, os::vm_allocation_granularity()), + "Size must be a multiple of allocation granularity (page size)"); size_t extra_size = size + alignment; assert(extra_size >= size, "overflow, size is too large to allow alignment"); diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp index 56301fe2e87..223bc0bd135 100644 --- a/src/hotspot/os/windows/os_windows.cpp +++ b/src/hotspot/os/windows/os_windows.cpp @@ -3331,9 +3331,10 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in // virtual space to get requested alignment, like posix-like os's. // Windows prevents multiple thread from remapping over each other so this loop is thread-safe. static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc) { - assert((alignment & (os::vm_allocation_granularity() - 1)) == 0, - "Alignment must be a multiple of allocation granularity (page size)"); - assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned"); + assert(is_aligned(alignment, os::vm_allocation_granularity()), + "Alignment must be a multiple of allocation granularity (page size)"); + assert(is_aligned(size, os::vm_allocation_granularity()), + "Size must be a multiple of allocation granularity (page size)"); size_t extra_size = size + alignment; assert(extra_size >= size, "overflow, size is too large to allow alignment"); diff --git a/src/hotspot/share/cds/metaspaceShared.cpp b/src/hotspot/share/cds/metaspaceShared.cpp index b73048a9bf5..d85db121db3 100644 --- a/src/hotspot/share/cds/metaspaceShared.cpp +++ b/src/hotspot/share/cds/metaspaceShared.cpp @@ -1331,11 +1331,11 @@ char* MetaspaceShared::reserve_address_space_for_archives(FileMapInfo* static_ma os::vm_page_size(), (char*) base_address); } else { // We did not manage to reserve at the preferred address, or were instructed to relocate. In that - // case we reserve whereever possible, but the start address needs to be encodable as narrow Klass - // encoding base since the archived heap objects contain nKlass IDs precalculated toward the start + // case we reserve wherever possible, but the start address needs to be encodable as narrow Klass + // encoding base since the archived heap objects contain nKlass IDs pre-calculated toward the start // of the shared Metaspace. That prevents us from using zero-based encoding and therefore we won't // try allocating in low-address regions. - total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* try_in_low_address_ranges */); + total_space_rs = Metaspace::reserve_address_space_for_compressed_classes(total_range_size, false /* optimize_for_zero_base */); } if (!total_space_rs.is_reserved()) { diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp index 1aa0923ff9c..573df165d0d 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp @@ -1,6 +1,7 @@ /* * Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2017, 2021 SAP SE. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -578,54 +579,15 @@ bool Metaspace::class_space_is_initialized() { // Reserve a range of memory that is to contain narrow Klass IDs. If "try_in_low_address_ranges" // is true, we will attempt to reserve memory suitable for zero-based encoding. -ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t size, bool try_in_low_address_ranges) { - +ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t size, bool optimize_for_zero_base) { char* result = nullptr; - const bool randomize = RandomizeClassSpaceLocation; - // First try to reserve in low address ranges. - if (try_in_low_address_ranges) { - constexpr uintptr_t unscaled_max = ((uintptr_t)UINT_MAX + 1); - log_debug(metaspace, map)("Trying below " SIZE_FORMAT_X " for unscaled narrow Klass encoding", unscaled_max); - result = os::attempt_reserve_memory_between(nullptr, (char*)unscaled_max, - size, Metaspace::reserve_alignment(), randomize); - if (result == nullptr) { - constexpr uintptr_t zerobased_max = unscaled_max << LogKlassAlignmentInBytes; - log_debug(metaspace, map)("Trying below " SIZE_FORMAT_X " for zero-based narrow Klass encoding", zerobased_max); - result = os::attempt_reserve_memory_between((char*)unscaled_max, (char*)zerobased_max, - size, Metaspace::reserve_alignment(), randomize); - } - } // end: low-address reservation - -#if defined(AARCH64) || defined(PPC64) || defined(S390) - if (result == nullptr) { - // Failing zero-based allocation, or in strict_base mode, try to come up with - // an optimized start address that is amenable to JITs that use 16-bit moves to - // load the encoding base as a short immediate. - // Therefore we try here for an address that when right-shifted by - // LogKlassAlignmentInBytes has only 1s in the third 16-bit quadrant. - // - // Example: for shift=3, the address space searched would be - // [0x0080_0000_0000 - 0xFFF8_0000_0000]. - - // Number of least significant bits that should be zero - constexpr int lo_zero_bits = 32 + LogKlassAlignmentInBytes; - // Number of most significant bits that should be zero - constexpr int hi_zero_bits = 16; - - constexpr size_t alignment = nth_bit(lo_zero_bits); - assert(alignment >= Metaspace::reserve_alignment(), "Sanity"); - constexpr uint64_t min = alignment; - constexpr uint64_t max = nth_bit(64 - hi_zero_bits); - - log_debug(metaspace, map)("Trying between " UINT64_FORMAT_X " and " UINT64_FORMAT_X - " with " SIZE_FORMAT_X " alignment", min, max, alignment); - result = os::attempt_reserve_memory_between((char*)min, (char*)max, size, alignment, randomize); - } -#endif // defined(AARCH64) || defined(PPC64) || defined(S390) + NOT_ZERO(result = + (char*) CompressedKlassPointers::reserve_address_space_for_compressed_classes(size, RandomizeClassSpaceLocation, + optimize_for_zero_base)); if (result == nullptr) { - // Fallback: reserve anywhere and hope the resulting block is usable. + // Fallback: reserve anywhere log_debug(metaspace, map)("Trying anywhere..."); result = os::reserve_memory_aligned(size, Metaspace::reserve_alignment(), false); } @@ -633,10 +595,12 @@ ReservedSpace Metaspace::reserve_address_space_for_compressed_classes(size_t siz // Wrap resulting range in ReservedSpace ReservedSpace rs; if (result != nullptr) { + log_debug(metaspace, map)("Mapped at " PTR_FORMAT, p2i(result)); assert(is_aligned(result, Metaspace::reserve_alignment()), "Alignment too small for metaspace"); rs = ReservedSpace::space_for_range(result, size, Metaspace::reserve_alignment(), os::vm_page_size(), false, false); } else { + log_debug(metaspace, map)("Failed to map."); rs = ReservedSpace(); } return rs; diff --git a/src/hotspot/share/memory/metaspace.hpp b/src/hotspot/share/memory/metaspace.hpp index 652a2be35d9..a90ff775647 100644 --- a/src/hotspot/share/memory/metaspace.hpp +++ b/src/hotspot/share/memory/metaspace.hpp @@ -76,7 +76,7 @@ public: // Reserve a range of memory that is to contain narrow Klass IDs. If "try_in_low_address_ranges" // is true, we will attempt to reserve memory suitable for zero-based encoding. - static ReservedSpace reserve_address_space_for_compressed_classes(size_t size, bool try_in_low_address_ranges); + static ReservedSpace reserve_address_space_for_compressed_classes(size_t size, bool optimize_for_zero_base); // Given a prereserved space, use that to set up the compressed class space list. static void initialize_class_space(ReservedSpace rs); diff --git a/src/hotspot/share/oops/compressedKlass.cpp b/src/hotspot/share/oops/compressedKlass.cpp index 40b4ae17011..f9d0466dda4 100644 --- a/src/hotspot/share/oops/compressedKlass.cpp +++ b/src/hotspot/share/oops/compressedKlass.cpp @@ -23,8 +23,11 @@ */ #include "precompiled.hpp" +#include "logging/log.hpp" +#include "memory/metaspace.hpp" #include "oops/compressedKlass.hpp" #include "runtime/globals.hpp" +#include "runtime/os.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/ostream.hpp" @@ -35,6 +38,13 @@ size_t CompressedKlassPointers::_range = 0; #ifdef _LP64 +#ifdef ASSERT +void CompressedKlassPointers::assert_is_valid_encoding(address addr, size_t len, address base, int shift) { + assert(base + nth_bit(32 + shift) >= addr + len, "Encoding (base=" PTR_FORMAT ", shift=%d) does not " + "fully cover the class range " PTR_FORMAT "-" PTR_FORMAT, p2i(base), shift, p2i(addr), p2i(addr + len)); +} +#endif + // Given a klass range [addr, addr+len) and a given encoding scheme, assert that this scheme covers the range, then // set this encoding scheme. Used by CDS at runtime to re-instate the scheme used to pre-compute klass ids for // archived heap objects. @@ -50,45 +60,62 @@ void CompressedKlassPointers::initialize_for_given_encoding(address addr, size_t assert(requested_base == addr, "Invalid requested base"); assert(encoding_range_end >= end, "Encoding does not cover the full Klass range"); - set_base(requested_base); - set_shift(requested_shift); - set_range(encoding_range_size); + _base = requested_base; + _shift = requested_shift; + _range = encoding_range_size; + + DEBUG_ONLY(assert_is_valid_encoding(addr, len, _base, _shift);) } -// Given an address range [addr, addr+len) which the encoding is supposed to -// cover, choose base, shift and range. -// The address range is the expected range of uncompressed Klass pointers we -// will encounter (and the implicit promise that there will be no Klass -// structures outside this range). +char* CompressedKlassPointers::reserve_address_space_X(uintptr_t from, uintptr_t to, size_t size, size_t alignment, bool aslr) { + alignment = MAX2(Metaspace::reserve_alignment(), alignment); + return os::attempt_reserve_memory_between((char*)from, (char*)to, size, alignment, aslr); +} + +char* CompressedKlassPointers::reserve_address_space_for_unscaled_encoding(size_t size, bool aslr) { + return reserve_address_space_X(0, nth_bit(32), size, Metaspace::reserve_alignment(), aslr); +} + +char* CompressedKlassPointers::reserve_address_space_for_zerobased_encoding(size_t size, bool aslr) { + return reserve_address_space_X(nth_bit(32), nth_bit(32 + LogKlassAlignmentInBytes), size, Metaspace::reserve_alignment(), aslr); +} + +char* CompressedKlassPointers::reserve_address_space_for_16bit_move(size_t size, bool aslr) { + return reserve_address_space_X(nth_bit(32), nth_bit(48), size, nth_bit(32), aslr); +} + +#ifndef AARCH64 +// On aarch64 we have an own version; all other platforms use the default version void CompressedKlassPointers::initialize(address addr, size_t len) { + // The default version of this code tries, in order of preference: + // -unscaled (base=0 shift=0) + // -zero-based (base=0 shift>0) + // -nonzero-base (base>0 shift=0) + // Note that base>0 shift>0 should never be needed, since the klass range will + // never exceed 4GB. + constexpr uintptr_t unscaled_max = nth_bit(32); + assert(len <= unscaled_max, "Klass range larger than 32 bits?"); + + constexpr uintptr_t zerobased_max = nth_bit(32 + LogKlassAlignmentInBytes); + address const end = addr + len; - - address base; - int shift; - size_t range; - - // Attempt to run with encoding base == zero - if (end <= (address)KlassEncodingMetaspaceMax) { - base = 0; + if (end <= (address)unscaled_max) { + _base = nullptr; + _shift = 0; } else { - base = addr; + if (end <= (address)zerobased_max) { + _base = nullptr; + _shift = LogKlassAlignmentInBytes; + } else { + _base = addr; + _shift = 0; + } } + _range = end - _base; - // Highest offset a Klass* can ever have in relation to base. - range = end - base; - - // We may not even need a shift if the range fits into 32bit: - const uint64_t UnscaledClassSpaceMax = (uint64_t(max_juint) + 1); - if (range <= UnscaledClassSpaceMax) { - shift = 0; - } else { - shift = LogKlassAlignmentInBytes; - } - - set_base(base); - set_shift(shift); - set_range(range); + DEBUG_ONLY(assert_is_valid_encoding(addr, len, _base, _shift);) } +#endif // !AARCH64 void CompressedKlassPointers::print_mode(outputStream* st) { st->print_cr("Narrow klass base: " PTR_FORMAT ", Narrow klass shift: %d, " @@ -96,19 +123,4 @@ void CompressedKlassPointers::print_mode(outputStream* st) { range()); } -void CompressedKlassPointers::set_base(address base) { - assert(UseCompressedClassPointers, "no compressed klass ptrs?"); - _base = base; -} - -void CompressedKlassPointers::set_shift(int shift) { - assert(shift == 0 || shift == LogKlassAlignmentInBytes, "invalid shift for klass ptrs"); - _shift = shift; -} - -void CompressedKlassPointers::set_range(size_t range) { - assert(UseCompressedClassPointers, "no compressed klass ptrs?"); - _range = range; -} - #endif // _LP64 diff --git a/src/hotspot/share/oops/compressedKlass.hpp b/src/hotspot/share/oops/compressedKlass.hpp index ac0bad9ea82..e871fe5bdcc 100644 --- a/src/hotspot/share/oops/compressedKlass.hpp +++ b/src/hotspot/share/oops/compressedKlass.hpp @@ -56,15 +56,24 @@ class CompressedKlassPointers : public AllStatic { // could use this info to optimize encoding. static size_t _range; - static void set_base(address base); - static void set_range(size_t range); - static void set_shift(int shift); + // Helper function for common cases. + static char* reserve_address_space_X(uintptr_t from, uintptr_t to, size_t size, size_t alignment, bool aslr); + static char* reserve_address_space_for_unscaled_encoding(size_t size, bool aslr); + static char* reserve_address_space_for_zerobased_encoding(size_t size, bool aslr); + static char* reserve_address_space_for_16bit_move(size_t size, bool aslr); + + DEBUG_ONLY(static void assert_is_valid_encoding(address addr, size_t len, address base, int shift);) public: + // Reserve a range of memory that is to contain Klass strucutures which are referenced by narrow Klass IDs. + // If optimize_for_zero_base is true, the implementation will attempt to reserve optimized for zero-based encoding. + static char* reserve_address_space_for_compressed_classes(size_t size, bool aslr, bool optimize_for_zero_base); + // Given a klass range [addr, addr+len) and a given encoding scheme, assert that this scheme covers the range, then // set this encoding scheme. Used by CDS at runtime to re-instate the scheme used to pre-compute klass ids for - // archived heap objects. + // archived heap objects. In this case, we don't have the freedom to choose base and shift; they are handed to + // us from CDS. static void initialize_for_given_encoding(address addr, size_t len, address requested_base, int requested_shift); // Given an address range [addr, addr+len) which the encoding is supposed to diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 99d2488a593..dce90c78eb4 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1999,6 +1999,10 @@ const int ObjectAlignmentInBytes = 8; "(default) disables native heap trimming.") \ range(0, UINT_MAX) \ \ + develop(bool, SimulateFullAddressSpace, false, \ + "Simulates a very populated, fragmented address space; no " \ + "targeted reservations will succeed.") \ + \ product(bool, ProfileExceptionHandlers, true, \ "Profile exception handlers") \ diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp index cafc9b10cbb..0eb4015a246 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -1810,7 +1810,7 @@ char* os::reserve_memory(size_t bytes, bool executable, MEMFLAGS flags) { } char* os::attempt_reserve_memory_at(char* addr, size_t bytes, bool executable) { - char* result = pd_attempt_reserve_memory_at(addr, bytes, executable); + char* result = SimulateFullAddressSpace ? nullptr : pd_attempt_reserve_memory_at(addr, bytes, executable); if (result != nullptr) { MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); log_debug(os)("Reserved memory at " INTPTR_FORMAT " for " SIZE_FORMAT " bytes.", p2i(addr), bytes); @@ -1818,7 +1818,6 @@ char* os::attempt_reserve_memory_at(char* addr, size_t bytes, bool executable) { log_debug(os)("Attempt to reserve memory at " INTPTR_FORMAT " for " SIZE_FORMAT " bytes failed, errno %d", p2i(addr), bytes, get_last_error()); } - return result; } @@ -1874,10 +1873,10 @@ char* os::attempt_reserve_memory_between(char* min, char* max, size_t bytes, siz // we attempt to minimize fragmentation. constexpr unsigned total_shuffle_threshold = 1024; -#define ARGSFMT " range [" PTR_FORMAT "-" PTR_FORMAT "), size " SIZE_FORMAT_X ", alignment " SIZE_FORMAT_X ", randomize: %d" +#define ARGSFMT "range [" PTR_FORMAT "-" PTR_FORMAT "), size " SIZE_FORMAT_X ", alignment " SIZE_FORMAT_X ", randomize: %d" #define ARGSFMTARGS p2i(min), p2i(max), bytes, alignment, randomize - log_trace(os, map) ("reserve_between (" ARGSFMT ")", ARGSFMTARGS); + log_debug(os, map) ("reserve_between (" ARGSFMT ")", ARGSFMTARGS); assert(is_power_of_2(alignment), "alignment invalid (" ARGSFMT ")", ARGSFMTARGS); assert(alignment < SIZE_MAX / 2, "alignment too large (" ARGSFMT ")", ARGSFMTARGS); @@ -1987,7 +1986,7 @@ char* os::attempt_reserve_memory_between(char* min, char* max, size_t bytes, siz const unsigned candidate_offset = points[i]; char* const candidate = lo_att + candidate_offset * alignment_adjusted; assert(candidate <= hi_att, "Invalid offset %u (" ARGSFMT ")", candidate_offset, ARGSFMTARGS); - result = os::pd_attempt_reserve_memory_at(candidate, bytes, false); + result = SimulateFullAddressSpace ? nullptr : os::pd_attempt_reserve_memory_at(candidate, bytes, false); if (!result) { log_trace(os, map)("Failed to attach at " PTR_FORMAT, p2i(candidate)); } @@ -2005,6 +2004,8 @@ char* os::attempt_reserve_memory_between(char* min, char* max, size_t bytes, siz log_trace(os, map)(ERRFMT, ERRFMTARGS); log_debug(os, map)("successfully attached at " PTR_FORMAT, p2i(result)); MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC); + } else { + log_debug(os, map)("failed to attach anywhere in [" PTR_FORMAT "-" PTR_FORMAT ")", p2i(min), p2i(max)); } return result; #undef ARGSFMT diff --git a/test/hotspot/jtreg/runtime/CompressedOops/CompressedCPUSpecificClassSpaceReservation.java b/test/hotspot/jtreg/runtime/CompressedOops/CompressedCPUSpecificClassSpaceReservation.java new file mode 100644 index 00000000000..8a25b1eff88 --- /dev/null +++ b/test/hotspot/jtreg/runtime/CompressedOops/CompressedCPUSpecificClassSpaceReservation.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2013, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Test the various CPU-specific reservation schemes + * @requires vm.bits == 64 & !vm.graal.enabled & vm.debug == true + * @requires vm.flagless + * @requires (os.family != "windows") & (os.family != "aix") + * @library /test/lib + * @modules java.base/jdk.internal.misc + * java.management + * @run driver CompressedCPUSpecificClassSpaceReservation + */ + +import jdk.test.lib.Platform; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.process.ProcessTools; +import jtreg.SkippedException; + +import java.io.IOException; + +public class CompressedCPUSpecificClassSpaceReservation { + // Note: windows: On windows, we currently have the issue that os::reserve_memory_aligned relies on + // os::attempt_reserve_memory_at because VirtualAlloc cannot be unmapped in parts; this precludes use of + // +SimulateFullAddressSpace (VM won't be able to reserve heap). Therefore we exclude the test for windows + // for now. + + private static void do_test(boolean CDS) throws IOException { + // We start the VM with -XX:+SimulateFullAdressSpace, which means the JVM will go through all motions + // of reserving the cds+class space, but never succeed. That means we see every single allocation attempt. + // We start with -Xlog options enabled. The expected output goes like this: + // [0.017s][debug][os,map] reserve_between (range [0x0000000000000000-0x0000000100000000), size 0x41000000, alignment 0x1000000, randomize: 1) + ProcessBuilder pb = ProcessTools.createLimitedTestJavaProcessBuilder( + "-Xshare:" + (CDS ? "on" : "off"), + "-Xmx128m", + "-XX:CompressedClassSpaceSize=128m", + "-Xlog:metaspace*", "-Xlog:metaspace+map=trace", "-Xlog:os+map=trace", + "-XX:+SimulateFullAddressSpace", // So that no resevation attempt will succeed + "-version"); + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + + final String tryReserveForUnscaled = "reserve_between (range [0x0000000000000000-0x0000000100000000)"; + final String tryReserveForZeroBased = "reserve_between (range [0x0000000100000000-0x0000000800000000)"; + final String tryReserveFor16bitMoveIntoQ3 = "reserve_between (range [0x0000000100000000-0x0001000000000000)"; + if (Platform.isAArch64()) { + if (CDS) { + output.shouldNotContain(tryReserveForUnscaled); + } else { + output.shouldContain(tryReserveForUnscaled); + } + output.shouldContain("Trying to reserve at an EOR-compatible address"); + output.shouldNotContain(tryReserveForZeroBased); + output.shouldContain(tryReserveFor16bitMoveIntoQ3); + } else if (Platform.isPPC()) { + if (CDS) { + output.shouldNotContain(tryReserveForUnscaled); + output.shouldNotContain(tryReserveForZeroBased); + } else { + output.shouldContain(tryReserveForUnscaled); + output.shouldContain(tryReserveForZeroBased); + } + output.shouldContain(tryReserveFor16bitMoveIntoQ3); + } else if (Platform.isRISCV64()) { + output.shouldContain(tryReserveForUnscaled); // unconditionally + if (CDS) { + output.shouldNotContain(tryReserveForZeroBased); + // bits 32..44 + output.shouldContain("reserve_between (range [0x0000000100000000-0x0000100000000000)"); + } else { + output.shouldContain(tryReserveForZeroBased); + // bits 32..44, but not lower than zero-based limit + output.shouldContain("reserve_between (range [0x0000000800000000-0x0000100000000000)"); + } + // bits 44..64 + output.shouldContain("reserve_between (range [0x0000100000000000-0xffffffffffffffff)"); + } else if (Platform.isS390x()) { + output.shouldContain(tryReserveForUnscaled); // unconditionally + if (CDS) { + output.shouldNotContain(tryReserveForZeroBased); + } else { + output.shouldContain(tryReserveForZeroBased); + } + output.shouldContain(tryReserveFor16bitMoveIntoQ3); + } else if (Platform.isX64()) { + if (CDS) { + output.shouldNotContain(tryReserveForUnscaled); + output.shouldNotContain(tryReserveForZeroBased); + } else { + output.shouldContain(tryReserveForUnscaled); + output.shouldContain(tryReserveForZeroBased); + } + } else { + throw new RuntimeException("Unexpected platform"); + } + + // In all cases we should have managed to map successfully eventually + if (CDS) { + output.shouldContain("CDS archive(s) mapped at:"); + } else { + output.shouldContain("CDS archive(s) not mapped"); + } + output.shouldContain("Compressed class space mapped at:"); + } + + public static void main(String[] args) throws Exception { + System.out.println("Test with CDS"); + do_test(true); + System.out.println("Test without CDS"); + do_test(false); + } +}