8269851: OperatingSystemMXBean getProcessCpuLoad reports incorrect process cpu usage in containers
Co-authored-by: Severin Gehwolf <sgehwolf@openjdk.org> Reviewed-by: sgehwolf
This commit is contained in:
parent
41b4c19086
commit
25f00d787c
@ -25,11 +25,14 @@
|
|||||||
|
|
||||||
package com.sun.management.internal;
|
package com.sun.management.internal;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.function.DoubleSupplier;
|
||||||
|
import java.util.function.LongSupplier;
|
||||||
|
import java.util.function.ToDoubleFunction;
|
||||||
|
|
||||||
import jdk.internal.platform.Metrics;
|
import jdk.internal.platform.Metrics;
|
||||||
import sun.management.BaseOperatingSystemImpl;
|
import sun.management.BaseOperatingSystemImpl;
|
||||||
import sun.management.VMManagement;
|
import sun.management.VMManagement;
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
/**
|
/**
|
||||||
* Implementation class for the operating system.
|
* Implementation class for the operating system.
|
||||||
* Standard and committed hotspot-specific metrics if any.
|
* Standard and committed hotspot-specific metrics if any.
|
||||||
@ -42,8 +45,137 @@ class OperatingSystemImpl extends BaseOperatingSystemImpl
|
|||||||
|
|
||||||
private static final int MAX_ATTEMPTS_NUMBER = 10;
|
private static final int MAX_ATTEMPTS_NUMBER = 10;
|
||||||
private final Metrics containerMetrics;
|
private final Metrics containerMetrics;
|
||||||
private long usageTicks = 0; // used for cpu load calculation
|
private ContainerCpuTicks systemLoadTicks = new SystemCpuTicks();
|
||||||
private long totalTicks = 0; // used for cpu load calculation
|
private ContainerCpuTicks processLoadTicks = new ProcessCpuTicks();
|
||||||
|
|
||||||
|
private abstract class ContainerCpuTicks {
|
||||||
|
private long usageTicks = 0;
|
||||||
|
private long totalTicks = 0;
|
||||||
|
|
||||||
|
private double getUsageDividesTotal(long usageTicks, long totalTicks) {
|
||||||
|
// If cpu quota or cpu shares are in effect. Calculate the cpu load
|
||||||
|
// based on the following formula (similar to how
|
||||||
|
// getCpuLoad0() is being calculated):
|
||||||
|
//
|
||||||
|
// | usageTicks - usageTicks' |
|
||||||
|
// ------------------------------
|
||||||
|
// | totalTicks - totalTicks' |
|
||||||
|
//
|
||||||
|
// where usageTicks' and totalTicks' are historical values
|
||||||
|
// retrieved via an earlier call of this method.
|
||||||
|
if (usageTicks < 0 || totalTicks <= 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
long distance = usageTicks - this.usageTicks;
|
||||||
|
this.usageTicks = usageTicks;
|
||||||
|
long totalDistance = totalTicks - this.totalTicks;
|
||||||
|
this.totalTicks = totalTicks;
|
||||||
|
double systemLoad = 0.0;
|
||||||
|
if (distance > 0 && totalDistance > 0) {
|
||||||
|
systemLoad = ((double)distance) / totalDistance;
|
||||||
|
}
|
||||||
|
// Ensure the return value is in the range 0.0 -> 1.0
|
||||||
|
systemLoad = Math.max(0.0, systemLoad);
|
||||||
|
systemLoad = Math.min(1.0, systemLoad);
|
||||||
|
return systemLoad;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getContainerCpuLoad() {
|
||||||
|
assert(containerMetrics != null);
|
||||||
|
long quota = containerMetrics.getCpuQuota();
|
||||||
|
long share = containerMetrics.getCpuShares();
|
||||||
|
if (quota > 0) {
|
||||||
|
long numPeriods = containerMetrics.getCpuNumPeriods();
|
||||||
|
long quotaNanos = TimeUnit.MICROSECONDS.toNanos(quota * numPeriods);
|
||||||
|
return getUsageDividesTotal(cpuUsageSupplier().getAsLong(), quotaNanos);
|
||||||
|
} else if (share > 0) {
|
||||||
|
long hostTicks = getHostTotalCpuTicks0();
|
||||||
|
int totalCPUs = getHostOnlineCpuCount0();
|
||||||
|
int containerCPUs = getAvailableProcessors();
|
||||||
|
// scale the total host load to the actual container cpus
|
||||||
|
hostTicks = hostTicks * containerCPUs / totalCPUs;
|
||||||
|
return getUsageDividesTotal(cpuUsageSupplier().getAsLong(), hostTicks);
|
||||||
|
} else {
|
||||||
|
// If CPU quotas and shares are not active then find the average load for
|
||||||
|
// all online CPUs that are allowed to run this container.
|
||||||
|
|
||||||
|
// If the cpuset is the same as the host's one there is no need to iterate over each CPU
|
||||||
|
if (isCpuSetSameAsHostCpuSet()) {
|
||||||
|
return defaultCpuLoadSupplier().getAsDouble();
|
||||||
|
} else {
|
||||||
|
int[] cpuSet = containerMetrics.getEffectiveCpuSetCpus();
|
||||||
|
// in case the effectiveCPUSetCpus are not available, attempt to use just cpusets.cpus
|
||||||
|
if (cpuSet == null || cpuSet.length <= 0) {
|
||||||
|
cpuSet = containerMetrics.getCpuSetCpus();
|
||||||
|
}
|
||||||
|
if (cpuSet == null) {
|
||||||
|
// cgroups is mounted, but CPU resource is not limited.
|
||||||
|
// We can assume the VM is run on the host CPUs.
|
||||||
|
return defaultCpuLoadSupplier().getAsDouble();
|
||||||
|
} else if (cpuSet.length > 0) {
|
||||||
|
return cpuSetCalc().applyAsDouble(cpuSet);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract DoubleSupplier defaultCpuLoadSupplier();
|
||||||
|
protected abstract ToDoubleFunction<int[]> cpuSetCalc();
|
||||||
|
protected abstract LongSupplier cpuUsageSupplier();
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ProcessCpuTicks extends ContainerCpuTicks {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DoubleSupplier defaultCpuLoadSupplier() {
|
||||||
|
return () -> getProcessCpuLoad0();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ToDoubleFunction<int[]> cpuSetCalc() {
|
||||||
|
return (int[] cpuSet) -> {
|
||||||
|
int totalCPUs = getHostOnlineCpuCount0();
|
||||||
|
int containerCPUs = getAvailableProcessors();
|
||||||
|
return Math.min(1.0, getProcessCpuLoad0() * totalCPUs / containerCPUs);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected LongSupplier cpuUsageSupplier() {
|
||||||
|
return () -> getProcessCpuTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SystemCpuTicks extends ContainerCpuTicks {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DoubleSupplier defaultCpuLoadSupplier() {
|
||||||
|
return () -> getCpuLoad0();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected ToDoubleFunction<int[]> cpuSetCalc() {
|
||||||
|
return (int[] cpuSet) -> {
|
||||||
|
double systemLoad = 0.0;
|
||||||
|
for (int cpu : cpuSet) {
|
||||||
|
double cpuLoad = getSingleCpuLoad0(cpu);
|
||||||
|
if (cpuLoad < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
systemLoad += cpuLoad;
|
||||||
|
}
|
||||||
|
return systemLoad / cpuSet.length;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected LongSupplier cpuUsageSupplier() {
|
||||||
|
return () -> containerMetrics.getCpuUsage();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
OperatingSystemImpl(VMManagement vm) {
|
OperatingSystemImpl(VMManagement vm) {
|
||||||
super(vm);
|
super(vm);
|
||||||
@ -134,90 +266,17 @@ class OperatingSystemImpl extends BaseOperatingSystemImpl
|
|||||||
return getMaxFileDescriptorCount0();
|
return getMaxFileDescriptorCount0();
|
||||||
}
|
}
|
||||||
|
|
||||||
private double getUsageDividesTotal(long usageTicks, long totalTicks) {
|
|
||||||
// If cpu quota or cpu shares are in effect calculate the cpu load
|
|
||||||
// based on the following formula (similar to how
|
|
||||||
// getCpuLoad0() is being calculated):
|
|
||||||
//
|
|
||||||
// | usageTicks - usageTicks' |
|
|
||||||
// ------------------------------
|
|
||||||
// | totalTicks - totalTicks' |
|
|
||||||
//
|
|
||||||
// where usageTicks' and totalTicks' are historical values
|
|
||||||
// retrieved via an earlier call of this method.
|
|
||||||
//
|
|
||||||
// Total ticks should be scaled to the container effective number
|
|
||||||
// of cpus, if cpu shares are in effect.
|
|
||||||
if (usageTicks < 0 || totalTicks <= 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
long distance = usageTicks - this.usageTicks;
|
|
||||||
this.usageTicks = usageTicks;
|
|
||||||
long totalDistance = totalTicks - this.totalTicks;
|
|
||||||
this.totalTicks = totalTicks;
|
|
||||||
|
|
||||||
double systemLoad = 0.0;
|
|
||||||
if (distance > 0 && totalDistance > 0) {
|
|
||||||
systemLoad = ((double)distance) / totalDistance;
|
|
||||||
}
|
|
||||||
// Ensure the return value is in the range 0.0 -> 1.0
|
|
||||||
systemLoad = Math.max(0.0, systemLoad);
|
|
||||||
systemLoad = Math.min(1.0, systemLoad);
|
|
||||||
return systemLoad;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double getCpuLoad() {
|
public double getCpuLoad() {
|
||||||
if (containerMetrics != null) {
|
if (containerMetrics != null) {
|
||||||
long quota = containerMetrics.getCpuQuota();
|
return systemLoadTicks.getContainerCpuLoad();
|
||||||
long share = containerMetrics.getCpuShares();
|
|
||||||
long usageNanos = containerMetrics.getCpuUsage();
|
|
||||||
if (quota > 0) {
|
|
||||||
long numPeriods = containerMetrics.getCpuNumPeriods();
|
|
||||||
long quotaNanos = TimeUnit.MICROSECONDS.toNanos(quota * numPeriods);
|
|
||||||
return getUsageDividesTotal(usageNanos, quotaNanos);
|
|
||||||
} else if (share > 0) {
|
|
||||||
long hostTicks = getHostTotalCpuTicks0();
|
|
||||||
int totalCPUs = getHostOnlineCpuCount0();
|
|
||||||
int containerCPUs = getAvailableProcessors();
|
|
||||||
// scale the total host load to the actual container cpus
|
|
||||||
hostTicks = hostTicks * containerCPUs / totalCPUs;
|
|
||||||
return getUsageDividesTotal(usageNanos, hostTicks);
|
|
||||||
} else {
|
|
||||||
// If CPU quotas and shares are not active then find the average system load for
|
|
||||||
// all online CPUs that are allowed to run this container.
|
|
||||||
|
|
||||||
// If the cpuset is the same as the host's one there is no need to iterate over each CPU
|
|
||||||
if (isCpuSetSameAsHostCpuSet()) {
|
|
||||||
return getCpuLoad0();
|
|
||||||
} else {
|
|
||||||
int[] cpuSet = containerMetrics.getEffectiveCpuSetCpus();
|
|
||||||
// in case the effectiveCPUSetCpus are not available, attempt to use just cpusets.cpus
|
|
||||||
if (cpuSet == null || cpuSet.length <= 0) {
|
|
||||||
cpuSet = containerMetrics.getCpuSetCpus();
|
|
||||||
}
|
|
||||||
if (cpuSet == null) {
|
|
||||||
// cgroups is mounted, but CPU resource is not limited.
|
|
||||||
// We can assume the VM is run on the host CPUs.
|
|
||||||
return getCpuLoad0();
|
|
||||||
} else if (cpuSet.length > 0) {
|
|
||||||
double systemLoad = 0.0;
|
|
||||||
for (int cpu : cpuSet) {
|
|
||||||
double cpuLoad = getSingleCpuLoad0(cpu);
|
|
||||||
if (cpuLoad < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
systemLoad += cpuLoad;
|
|
||||||
}
|
|
||||||
return systemLoad / cpuSet.length;
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return getCpuLoad0();
|
return getCpuLoad0();
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getProcessCpuLoad() {
|
public double getProcessCpuLoad() {
|
||||||
|
if (containerMetrics != null) {
|
||||||
|
return processLoadTicks.getContainerCpuLoad();
|
||||||
|
}
|
||||||
return getProcessCpuLoad0();
|
return getProcessCpuLoad0();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user