8213827: NUMA heap allocation does not respect process membind/interleave settings
Optionally use libnuma v2 API to query for and support NUMA membind/interleave process configuration. Reviewed-by: tschatzl, sangheki
This commit is contained in:
parent
a5f592e5b1
commit
8bd5f49675
@ -33,6 +33,7 @@
|
||||
#include "compiler/disassembler.hpp"
|
||||
#include "interpreter/interpreter.hpp"
|
||||
#include "logging/log.hpp"
|
||||
#include "logging/logStream.hpp"
|
||||
#include "memory/allocation.inline.hpp"
|
||||
#include "memory/filemap.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
@ -2780,7 +2781,7 @@ int os::Linux::get_existing_num_nodes() {
|
||||
|
||||
// Get the total number of nodes in the system including nodes without memory.
|
||||
for (node = 0; node <= highest_node_number; node++) {
|
||||
if (isnode_in_existing_nodes(node)) {
|
||||
if (is_node_in_existing_nodes(node)) {
|
||||
num_nodes++;
|
||||
}
|
||||
}
|
||||
@ -2796,7 +2797,7 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) {
|
||||
// node number. If the nodes have been bound explicitly using numactl membind,
|
||||
// then allocate memory from those nodes only.
|
||||
for (int node = 0; node <= highest_node_number; node++) {
|
||||
if (Linux::isnode_in_bound_nodes((unsigned int)node)) {
|
||||
if (Linux::is_node_in_bound_nodes((unsigned int)node)) {
|
||||
ids[i++] = node;
|
||||
}
|
||||
}
|
||||
@ -2899,11 +2900,15 @@ bool os::Linux::libnuma_init() {
|
||||
libnuma_dlsym(handle, "numa_distance")));
|
||||
set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
|
||||
libnuma_v2_dlsym(handle, "numa_get_membind")));
|
||||
set_numa_get_interleave_mask(CAST_TO_FN_PTR(numa_get_interleave_mask_func_t,
|
||||
libnuma_v2_dlsym(handle, "numa_get_interleave_mask")));
|
||||
|
||||
if (numa_available() != -1) {
|
||||
set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes"));
|
||||
set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr"));
|
||||
set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr"));
|
||||
set_numa_interleave_bitmask(_numa_get_interleave_mask());
|
||||
set_numa_membind_bitmask(_numa_get_membind());
|
||||
// Create an index -> node mapping, since nodes are not always consecutive
|
||||
_nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true);
|
||||
rebuild_nindex_to_node_map();
|
||||
@ -2929,7 +2934,7 @@ void os::Linux::rebuild_nindex_to_node_map() {
|
||||
|
||||
nindex_to_node()->clear();
|
||||
for (int node = 0; node <= highest_node_number; node++) {
|
||||
if (Linux::isnode_in_existing_nodes(node)) {
|
||||
if (Linux::is_node_in_existing_nodes(node)) {
|
||||
nindex_to_node()->append(node);
|
||||
}
|
||||
}
|
||||
@ -2966,16 +2971,16 @@ void os::Linux::rebuild_cpu_to_node_map() {
|
||||
// the closest configured node. Check also if node is bound, i.e. it's allowed
|
||||
// to allocate memory from the node. If it's not allowed, map cpus in that node
|
||||
// to the closest node from which memory allocation is allowed.
|
||||
if (!isnode_in_configured_nodes(nindex_to_node()->at(i)) ||
|
||||
!isnode_in_bound_nodes(nindex_to_node()->at(i))) {
|
||||
if (!is_node_in_configured_nodes(nindex_to_node()->at(i)) ||
|
||||
!is_node_in_bound_nodes(nindex_to_node()->at(i))) {
|
||||
closest_distance = INT_MAX;
|
||||
// Check distance from all remaining nodes in the system. Ignore distance
|
||||
// from itself, from another non-configured node, and from another non-bound
|
||||
// node.
|
||||
for (size_t m = 0; m < node_num; m++) {
|
||||
if (m != i &&
|
||||
isnode_in_configured_nodes(nindex_to_node()->at(m)) &&
|
||||
isnode_in_bound_nodes(nindex_to_node()->at(m))) {
|
||||
is_node_in_configured_nodes(nindex_to_node()->at(m)) &&
|
||||
is_node_in_bound_nodes(nindex_to_node()->at(m))) {
|
||||
distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m));
|
||||
// If a closest node is found, update. There is always at least one
|
||||
// configured and bound node in the system so there is always at least
|
||||
@ -3030,9 +3035,13 @@ os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
|
||||
os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
|
||||
os::Linux::numa_distance_func_t os::Linux::_numa_distance;
|
||||
os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
|
||||
os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask;
|
||||
os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy;
|
||||
unsigned long* os::Linux::_numa_all_nodes;
|
||||
struct bitmask* os::Linux::_numa_all_nodes_ptr;
|
||||
struct bitmask* os::Linux::_numa_nodes_ptr;
|
||||
struct bitmask* os::Linux::_numa_interleave_bitmask;
|
||||
struct bitmask* os::Linux::_numa_membind_bitmask;
|
||||
|
||||
bool os::pd_uncommit_memory(char* addr, size_t size) {
|
||||
uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE,
|
||||
@ -4936,6 +4945,74 @@ void os::pd_init_container_support() {
|
||||
OSContainer::init();
|
||||
}
|
||||
|
||||
void os::Linux::numa_init() {
|
||||
|
||||
// Java can be invoked as
|
||||
// 1. Without numactl and heap will be allocated/configured on all nodes as
|
||||
// per the system policy.
|
||||
// 2. With numactl --interleave:
|
||||
// Use numa_get_interleave_mask(v2) API to get nodes bitmask. The same
|
||||
// API for membind case bitmask is reset.
|
||||
// Interleave is only hint and Kernel can fallback to other nodes if
|
||||
// no memory is available on the target nodes.
|
||||
// 3. With numactl --membind:
|
||||
// Use numa_get_membind(v2) API to get nodes bitmask. The same API for
|
||||
// interleave case returns bitmask of all nodes.
|
||||
// numa_all_nodes_ptr holds bitmask of all nodes.
|
||||
// numa_get_interleave_mask(v2) and numa_get_membind(v2) APIs returns correct
|
||||
// bitmask when externally configured to run on all or fewer nodes.
|
||||
|
||||
if (!Linux::libnuma_init()) {
|
||||
UseNUMA = false;
|
||||
} else {
|
||||
if ((Linux::numa_max_node() < 1) || Linux::is_bound_to_single_node()) {
|
||||
// If there's only one node (they start from 0) or if the process
|
||||
// is bound explicitly to a single node using membind, disable NUMA.
|
||||
UseNUMA = false;
|
||||
} else {
|
||||
|
||||
LogTarget(Info,os) log;
|
||||
LogStream ls(log);
|
||||
|
||||
Linux::set_configured_numa_policy(Linux::identify_numa_policy());
|
||||
|
||||
struct bitmask* bmp = Linux::_numa_membind_bitmask;
|
||||
const char* numa_mode = "membind";
|
||||
|
||||
if (Linux::is_running_in_interleave_mode()) {
|
||||
bmp = Linux::_numa_interleave_bitmask;
|
||||
numa_mode = "interleave";
|
||||
}
|
||||
|
||||
ls.print("UseNUMA is enabled and invoked in '%s' mode."
|
||||
" Heap will be configured using NUMA memory nodes:", numa_mode);
|
||||
|
||||
for (int node = 0; node <= Linux::numa_max_node(); node++) {
|
||||
if (Linux::_numa_bitmask_isbitset(bmp, node)) {
|
||||
ls.print(" %d", node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (UseParallelGC && UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
|
||||
// With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
|
||||
// we can make the adaptive lgrp chunk resizing work. If the user specified both
|
||||
// UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn
|
||||
// and disable adaptive resizing.
|
||||
if (UseAdaptiveSizePolicy || UseAdaptiveNUMAChunkSizing) {
|
||||
warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, "
|
||||
"disabling adaptive resizing (-XX:-UseAdaptiveSizePolicy -XX:-UseAdaptiveNUMAChunkSizing)");
|
||||
UseAdaptiveSizePolicy = false;
|
||||
UseAdaptiveNUMAChunkSizing = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!UseNUMA && ForceNUMA) {
|
||||
UseNUMA = true;
|
||||
}
|
||||
}
|
||||
|
||||
// this is called _after_ the global arguments have been parsed
|
||||
jint os::init_2(void) {
|
||||
|
||||
@ -4980,32 +5057,7 @@ jint os::init_2(void) {
|
||||
Linux::glibc_version(), Linux::libpthread_version());
|
||||
|
||||
if (UseNUMA) {
|
||||
if (!Linux::libnuma_init()) {
|
||||
UseNUMA = false;
|
||||
} else {
|
||||
if ((Linux::numa_max_node() < 1) || Linux::isbound_to_single_node()) {
|
||||
// If there's only one node (they start from 0) or if the process
|
||||
// is bound explicitly to a single node using membind, disable NUMA.
|
||||
UseNUMA = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (UseParallelGC && UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
|
||||
// With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
|
||||
// we can make the adaptive lgrp chunk resizing work. If the user specified both
|
||||
// UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn
|
||||
// and disable adaptive resizing.
|
||||
if (UseAdaptiveSizePolicy || UseAdaptiveNUMAChunkSizing) {
|
||||
warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, "
|
||||
"disabling adaptive resizing (-XX:-UseAdaptiveSizePolicy -XX:-UseAdaptiveNUMAChunkSizing)");
|
||||
UseAdaptiveSizePolicy = false;
|
||||
UseAdaptiveNUMAChunkSizing = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!UseNUMA && ForceNUMA) {
|
||||
UseNUMA = true;
|
||||
}
|
||||
Linux::numa_init();
|
||||
}
|
||||
|
||||
if (MaxFDLimit) {
|
||||
|
@ -211,6 +211,7 @@ class Linux {
|
||||
// none present
|
||||
|
||||
private:
|
||||
static void numa_init();
|
||||
static void expand_stack_to(address bottom);
|
||||
|
||||
typedef int (*sched_getcpu_func_t)(void);
|
||||
@ -222,6 +223,7 @@ class Linux {
|
||||
typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
|
||||
typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
|
||||
typedef struct bitmask* (*numa_get_membind_func_t)(void);
|
||||
typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
|
||||
|
||||
typedef void (*numa_set_bind_policy_func_t)(int policy);
|
||||
typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
|
||||
@ -239,9 +241,12 @@ class Linux {
|
||||
static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
|
||||
static numa_distance_func_t _numa_distance;
|
||||
static numa_get_membind_func_t _numa_get_membind;
|
||||
static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
|
||||
static unsigned long* _numa_all_nodes;
|
||||
static struct bitmask* _numa_all_nodes_ptr;
|
||||
static struct bitmask* _numa_nodes_ptr;
|
||||
static struct bitmask* _numa_interleave_bitmask;
|
||||
static struct bitmask* _numa_membind_bitmask;
|
||||
|
||||
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
|
||||
static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
|
||||
@ -255,10 +260,21 @@ class Linux {
|
||||
static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
|
||||
static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
|
||||
static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
|
||||
static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
|
||||
static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
|
||||
static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
|
||||
static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
|
||||
static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; }
|
||||
static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; }
|
||||
static int sched_getcpu_syscall(void);
|
||||
|
||||
enum NumaAllocationPolicy{
|
||||
NotInitialized,
|
||||
Membind,
|
||||
Interleave
|
||||
};
|
||||
static NumaAllocationPolicy _current_numa_policy;
|
||||
|
||||
public:
|
||||
static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
|
||||
static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
|
||||
@ -272,11 +288,33 @@ class Linux {
|
||||
static int numa_tonode_memory(void *start, size_t size, int node) {
|
||||
return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
|
||||
}
|
||||
|
||||
static bool is_running_in_interleave_mode() {
|
||||
return _current_numa_policy == Interleave;
|
||||
}
|
||||
|
||||
static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) {
|
||||
_current_numa_policy = numa_policy;
|
||||
}
|
||||
|
||||
static NumaAllocationPolicy identify_numa_policy() {
|
||||
for (int node = 0; node <= Linux::numa_max_node(); node++) {
|
||||
if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) {
|
||||
return Interleave;
|
||||
}
|
||||
}
|
||||
return Membind;
|
||||
}
|
||||
|
||||
static void numa_interleave_memory(void *start, size_t size) {
|
||||
// Use v2 api if available
|
||||
if (_numa_interleave_memory_v2 != NULL && _numa_all_nodes_ptr != NULL) {
|
||||
_numa_interleave_memory_v2(start, size, _numa_all_nodes_ptr);
|
||||
} else if (_numa_interleave_memory != NULL && _numa_all_nodes != NULL) {
|
||||
// Prefer v2 API
|
||||
if (_numa_interleave_memory_v2 != NULL) {
|
||||
if (is_running_in_interleave_mode()) {
|
||||
_numa_interleave_memory_v2(start, size, _numa_interleave_bitmask);
|
||||
} else if (_numa_membind_bitmask != NULL) {
|
||||
_numa_interleave_memory_v2(start, size, _numa_membind_bitmask);
|
||||
}
|
||||
} else if (_numa_interleave_memory != NULL) {
|
||||
_numa_interleave_memory(start, size, _numa_all_nodes);
|
||||
}
|
||||
}
|
||||
@ -291,14 +329,14 @@ class Linux {
|
||||
static int get_node_by_cpu(int cpu_id);
|
||||
static int get_existing_num_nodes();
|
||||
// Check if numa node is configured (non-zero memory node).
|
||||
static bool isnode_in_configured_nodes(unsigned int n) {
|
||||
static bool is_node_in_configured_nodes(unsigned int n) {
|
||||
if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
|
||||
return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
// Check if numa node exists in the system (including zero memory nodes).
|
||||
static bool isnode_in_existing_nodes(unsigned int n) {
|
||||
static bool is_node_in_existing_nodes(unsigned int n) {
|
||||
if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
|
||||
return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
|
||||
} else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
|
||||
@ -317,16 +355,19 @@ class Linux {
|
||||
return false;
|
||||
}
|
||||
// Check if node is in bound node set.
|
||||
static bool isnode_in_bound_nodes(int node) {
|
||||
if (_numa_get_membind != NULL && _numa_bitmask_isbitset != NULL) {
|
||||
return _numa_bitmask_isbitset(_numa_get_membind(), node);
|
||||
} else {
|
||||
return false;
|
||||
static bool is_node_in_bound_nodes(int node) {
|
||||
if (_numa_bitmask_isbitset != NULL) {
|
||||
if (is_running_in_interleave_mode()) {
|
||||
return _numa_bitmask_isbitset(_numa_interleave_bitmask, node);
|
||||
} else {
|
||||
return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Check if bound to only one numa node.
|
||||
// Returns true if bound to a single numa node, otherwise returns false.
|
||||
static bool isbound_to_single_node() {
|
||||
static bool is_bound_to_single_node() {
|
||||
int nodes = 0;
|
||||
struct bitmask* bmp = NULL;
|
||||
unsigned int node = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user