8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors
Reviewed-by: psandoz, mduigou
This commit is contained in:
parent
2469f9573a
commit
3eafe5802b
@ -63,6 +63,7 @@ import java.util.stream.Collector;
|
||||
public class DoubleSummaryStatistics implements DoubleConsumer {
|
||||
private long count;
|
||||
private double sum;
|
||||
private double sumCompensation; // Low order bits of sum
|
||||
private double min = Double.POSITIVE_INFINITY;
|
||||
private double max = Double.NEGATIVE_INFINITY;
|
||||
|
||||
@ -81,7 +82,7 @@ public class DoubleSummaryStatistics implements DoubleConsumer {
|
||||
@Override
|
||||
public void accept(double value) {
|
||||
++count;
|
||||
sum += value;
|
||||
sumWithCompensation(value);
|
||||
min = Math.min(min, value);
|
||||
max = Math.max(max, value);
|
||||
}
|
||||
@ -95,11 +96,23 @@ public class DoubleSummaryStatistics implements DoubleConsumer {
|
||||
*/
|
||||
public void combine(DoubleSummaryStatistics other) {
|
||||
count += other.count;
|
||||
sum += other.sum;
|
||||
sumWithCompensation(other.sum);
|
||||
sumWithCompensation(other.sumCompensation);
|
||||
min = Math.min(min, other.min);
|
||||
max = Math.max(max, other.max);
|
||||
}
|
||||
|
||||
/**
|
||||
* Incorporate a new double value using Kahan summation /
|
||||
* compensated summation.
|
||||
*/
|
||||
private void sumWithCompensation(double value) {
|
||||
double tmp = value - sumCompensation;
|
||||
double velvel = sum + tmp; // Little wolf of rounding error
|
||||
sumCompensation = (velvel - sum) - tmp;
|
||||
sum = velvel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the count of values recorded.
|
||||
*
|
||||
@ -133,7 +146,8 @@ public class DoubleSummaryStatistics implements DoubleConsumer {
|
||||
* @return the sum of values, or zero if none
|
||||
*/
|
||||
public final double getSum() {
|
||||
return sum;
|
||||
// Better error bounds to add both terms as the final sum
|
||||
return sum + sumCompensation;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -505,13 +505,42 @@ public final class Collectors {
|
||||
*/
|
||||
public static <T> Collector<T, ?, Double>
|
||||
summingDouble(ToDoubleFunction<? super T> mapper) {
|
||||
/*
|
||||
* In the arrays allocated for the collect operation, index 0
|
||||
* holds the high-order bits of the running sum and index 1
|
||||
* holds the low-order bits of the sum computed via
|
||||
* compensated summation.
|
||||
*/
|
||||
return new CollectorImpl<>(
|
||||
() -> new double[1],
|
||||
(a, t) -> { a[0] += mapper.applyAsDouble(t); },
|
||||
(a, b) -> { a[0] += b[0]; return a; },
|
||||
a -> a[0], CH_NOID);
|
||||
() -> new double[2],
|
||||
(a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); },
|
||||
(a, b) -> { sumWithCompensation(a, b[0]); return sumWithCompensation(a, b[1]); },
|
||||
// Better error bounds to add both terms as the final sum
|
||||
a -> a[0] + a[1],
|
||||
CH_NOID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Incorporate a new double value using Kahan summation /
|
||||
* compensation summation.
|
||||
*
|
||||
* High-order bits of the sum are in intermediateSum[0], low-order
|
||||
* bits of the sum are in intermediateSum[1], any additional
|
||||
* elements are application-specific.
|
||||
*
|
||||
* @param intermediateSum the high-order and low-order words of the intermediate sum
|
||||
* @param value the name value to be included in the running sum
|
||||
*/
|
||||
static double[] sumWithCompensation(double[] intermediateSum, double value) {
|
||||
double tmp = value - intermediateSum[1];
|
||||
double sum = intermediateSum[0];
|
||||
double velvel = sum + tmp; // Little wolf of rounding error
|
||||
intermediateSum[1] = (velvel - sum) - tmp;
|
||||
intermediateSum[0] = velvel;
|
||||
return intermediateSum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a {@code Collector} that produces the arithmetic mean of an integer-valued
|
||||
* function applied to the input elements. If no elements are present,
|
||||
@ -560,17 +589,31 @@ public final class Collectors {
|
||||
* value is a {@code NaN} or the sum is at any point a {@code NaN} then the
|
||||
* average will be {@code NaN}.
|
||||
*
|
||||
* @implNote The {@code double} format can represent all
|
||||
* consecutive integers in the range -2<sup>53</sup> to
|
||||
* 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
|
||||
* values, the divisor in the average computation will saturate at
|
||||
* 2<sup>53</sup>, leading to additional numerical errors.
|
||||
*
|
||||
* @param <T> the type of the input elements
|
||||
* @param mapper a function extracting the property to be summed
|
||||
* @return a {@code Collector} that produces the sum of a derived property
|
||||
*/
|
||||
public static <T> Collector<T, ?, Double>
|
||||
averagingDouble(ToDoubleFunction<? super T> mapper) {
|
||||
/*
|
||||
* In the arrays allocated for the collect operation, index 0
|
||||
* holds the high-order bits of the running sum, index 1 holds
|
||||
* the low-order bits of the sum computed via compensated
|
||||
* summation, and index 2 holds the number of values seen.
|
||||
*/
|
||||
return new CollectorImpl<>(
|
||||
() -> new double[2],
|
||||
(a, t) -> { a[0] += mapper.applyAsDouble(t); a[1]++; },
|
||||
(a, b) -> { a[0] += b[0]; a[1] += b[1]; return a; },
|
||||
a -> (a[1] == 0) ? 0.0d : a[0] / a[1], CH_NOID);
|
||||
() -> new double[3],
|
||||
(a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); a[2]++; },
|
||||
(a, b) -> { sumWithCompensation(a, b[0]); sumWithCompensation(a, b[1]); a[2] += b[2]; return a; },
|
||||
// Better error bounds to add both terms as the final sum to compute average
|
||||
a -> (a[2] == 0) ? 0.0d : ((a[0] + a[1]) / a[2]),
|
||||
CH_NOID);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -377,8 +377,23 @@ abstract class DoublePipeline<E_IN>
|
||||
|
||||
@Override
|
||||
public final double sum() {
|
||||
// TODO: better algorithm to compensate for errors
|
||||
return reduce(0.0, Double::sum);
|
||||
/*
|
||||
* In the arrays allocated for the collect operation, index 0
|
||||
* holds the high-order bits of the running sum and index 1
|
||||
* holds the low-order bits of the sum computed via
|
||||
* compensated summation.
|
||||
*/
|
||||
double[] summation = collect(() -> new double[2],
|
||||
(ll, d) -> {
|
||||
Collectors.sumWithCompensation(ll, d);
|
||||
},
|
||||
(ll, rr) -> {
|
||||
Collectors.sumWithCompensation(ll, rr[0]);
|
||||
Collectors.sumWithCompensation(ll, rr[1]);
|
||||
});
|
||||
|
||||
// Better error bounds to add both terms as the final sum
|
||||
return summation[0] + summation[1];
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -391,20 +406,37 @@ abstract class DoublePipeline<E_IN>
|
||||
return reduce(Math::max);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*
|
||||
* @implNote The {@code double} format can represent all
|
||||
* consecutive integers in the range -2<sup>53</sup> to
|
||||
* 2<sup>53</sup>. If the pipeline has more than 2<sup>53</sup>
|
||||
* values, the divisor in the average computation will saturate at
|
||||
* 2<sup>53</sup>, leading to additional numerical errors.
|
||||
*/
|
||||
@Override
|
||||
public final OptionalDouble average() {
|
||||
double[] avg = collect(() -> new double[2],
|
||||
(ll, i) -> {
|
||||
ll[0]++;
|
||||
ll[1] += i;
|
||||
/*
|
||||
* In the arrays allocated for the collect operation, index 0
|
||||
* holds the high-order bits of the running sum, index 1 holds
|
||||
* the low-order bits of the sum computed via compensated
|
||||
* summation, and index 2 holds the number of values seen.
|
||||
*/
|
||||
double[] avg = collect(() -> new double[3],
|
||||
(ll, d) -> {
|
||||
ll[2]++;
|
||||
Collectors.sumWithCompensation(ll, d);
|
||||
},
|
||||
(ll, rr) -> {
|
||||
ll[0] += rr[0];
|
||||
ll[1] += rr[1];
|
||||
Collectors.sumWithCompensation(ll, rr[0]);
|
||||
Collectors.sumWithCompensation(ll, rr[1]);
|
||||
ll[2] += rr[2];
|
||||
});
|
||||
return avg[0] > 0
|
||||
? OptionalDouble.of(avg[1] / avg[0])
|
||||
: OptionalDouble.empty();
|
||||
return avg[2] > 0
|
||||
// Better error bounds to add both terms as the final sum to compute average
|
||||
? OptionalDouble.of((avg[0] + avg[1]) / avg[2])
|
||||
: OptionalDouble.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
108
jdk/test/java/util/stream/TestDoubleSumAverage.java
Normal file
108
jdk/test/java/util/stream/TestDoubleSumAverage.java
Normal file
@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.util.function.*;
|
||||
import java.util.stream.*;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8006572
|
||||
* @summary Test for use of non-naive summation in stream-related sum and average operations.
|
||||
*/
|
||||
public class TestDoubleSumAverage {
|
||||
public static void main(String... args) {
|
||||
int failures = 0;
|
||||
|
||||
failures += testForCompenstation();
|
||||
failures += testZeroAverageOfNonEmptyStream();
|
||||
|
||||
if (failures > 0) {
|
||||
throw new RuntimeException("Found " + failures + " numerical failure(s).");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the sum and average of a sequence of double values in
|
||||
* various ways and report an error if naive summation is used.
|
||||
*/
|
||||
private static int testForCompenstation() {
|
||||
int failures = 0;
|
||||
|
||||
/*
|
||||
* The exact sum of the test stream is 1 + 1e6*ulp(1.0) but a
|
||||
* naive summation algorithm will return 1.0 since (1.0 +
|
||||
* ulp(1.0)/2) will round to 1.0 again.
|
||||
*/
|
||||
double base = 1.0;
|
||||
double increment = Math.ulp(base)/2.0;
|
||||
int count = 1_000_001;
|
||||
|
||||
double expectedSum = base + (increment * (count - 1));
|
||||
double expectedAvg = expectedSum / count;
|
||||
|
||||
// Factory for double a stream of [base, increment, ..., increment] limited to a size of count
|
||||
Supplier<DoubleStream> ds = () -> DoubleStream.iterate(base, e -> increment).limit(count);
|
||||
|
||||
DoubleSummaryStatistics stats = ds.get().collect(DoubleSummaryStatistics::new,
|
||||
DoubleSummaryStatistics::accept,
|
||||
DoubleSummaryStatistics::combine);
|
||||
|
||||
failures += compareUlpDifference(expectedSum, stats.getSum(), 3);
|
||||
failures += compareUlpDifference(expectedAvg, stats.getAverage(), 3);
|
||||
|
||||
failures += compareUlpDifference(expectedSum,
|
||||
ds.get().sum(), 3);
|
||||
failures += compareUlpDifference(expectedAvg,
|
||||
ds.get().average().getAsDouble(), 3);
|
||||
|
||||
failures += compareUlpDifference(expectedSum,
|
||||
ds.get().boxed().collect(Collectors.summingDouble(d -> d)), 3);
|
||||
failures += compareUlpDifference(expectedAvg,
|
||||
ds.get().boxed().collect(Collectors.averagingDouble(d -> d)),3);
|
||||
return failures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test to verify that a non-empty stream with a zero average is non-empty.
|
||||
*/
|
||||
private static int testZeroAverageOfNonEmptyStream() {
|
||||
Supplier<DoubleStream> ds = () -> DoubleStream.iterate(0.0, e -> 0.0).limit(10);
|
||||
|
||||
return compareUlpDifference(0.0, ds.get().average().getAsDouble(), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the ulp difference of two double values and compare against an error threshold.
|
||||
*/
|
||||
private static int compareUlpDifference(double expected, double computed, double threshold) {
|
||||
double ulpDifference = Math.abs(expected - computed) / Math.ulp(expected);
|
||||
|
||||
if (ulpDifference > threshold) {
|
||||
System.err.printf("Numerical summation error too large, %g ulps rather than %g.%n",
|
||||
ulpDifference, threshold);
|
||||
return 1;
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user