From 3eafe5802b8d425542f061e8bfc557d7b022d258 Mon Sep 17 00:00:00 2001 From: Joe Darcy Date: Sun, 1 Dec 2013 23:35:28 -0800 Subject: [PATCH] 8006572: DoubleStream.sum() & DoubleSummaryStats implementations that reduce numerical errors Reviewed-by: psandoz, mduigou --- .../java/util/DoubleSummaryStatistics.java | 20 +++- .../classes/java/util/stream/Collectors.java | 59 ++++++++-- .../java/util/stream/DoublePipeline.java | 54 +++++++-- .../util/stream/TestDoubleSumAverage.java | 108 ++++++++++++++++++ 4 files changed, 219 insertions(+), 22 deletions(-) create mode 100644 jdk/test/java/util/stream/TestDoubleSumAverage.java diff --git a/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java b/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java index be9d4b5ae56..8a4a0f49f18 100644 --- a/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java +++ b/jdk/src/share/classes/java/util/DoubleSummaryStatistics.java @@ -63,6 +63,7 @@ import java.util.stream.Collector; public class DoubleSummaryStatistics implements DoubleConsumer { private long count; private double sum; + private double sumCompensation; // Low order bits of sum private double min = Double.POSITIVE_INFINITY; private double max = Double.NEGATIVE_INFINITY; @@ -81,7 +82,7 @@ public class DoubleSummaryStatistics implements DoubleConsumer { @Override public void accept(double value) { ++count; - sum += value; + sumWithCompensation(value); min = Math.min(min, value); max = Math.max(max, value); } @@ -95,11 +96,23 @@ public class DoubleSummaryStatistics implements DoubleConsumer { */ public void combine(DoubleSummaryStatistics other) { count += other.count; - sum += other.sum; + sumWithCompensation(other.sum); + sumWithCompensation(other.sumCompensation); min = Math.min(min, other.min); max = Math.max(max, other.max); } + /** + * Incorporate a new double value using Kahan summation / + * compensated summation. + */ + private void sumWithCompensation(double value) { + double tmp = value - sumCompensation; + double velvel = sum + tmp; // Little wolf of rounding error + sumCompensation = (velvel - sum) - tmp; + sum = velvel; + } + /** * Return the count of values recorded. * @@ -133,7 +146,8 @@ public class DoubleSummaryStatistics implements DoubleConsumer { * @return the sum of values, or zero if none */ public final double getSum() { - return sum; + // Better error bounds to add both terms as the final sum + return sum + sumCompensation; } /** diff --git a/jdk/src/share/classes/java/util/stream/Collectors.java b/jdk/src/share/classes/java/util/stream/Collectors.java index 93ffb0113c6..f52ccba57ad 100644 --- a/jdk/src/share/classes/java/util/stream/Collectors.java +++ b/jdk/src/share/classes/java/util/stream/Collectors.java @@ -505,13 +505,42 @@ public final class Collectors { */ public static Collector summingDouble(ToDoubleFunction mapper) { + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum and index 1 + * holds the low-order bits of the sum computed via + * compensated summation. + */ return new CollectorImpl<>( - () -> new double[1], - (a, t) -> { a[0] += mapper.applyAsDouble(t); }, - (a, b) -> { a[0] += b[0]; return a; }, - a -> a[0], CH_NOID); + () -> new double[2], + (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); }, + (a, b) -> { sumWithCompensation(a, b[0]); return sumWithCompensation(a, b[1]); }, + // Better error bounds to add both terms as the final sum + a -> a[0] + a[1], + CH_NOID); } + /** + * Incorporate a new double value using Kahan summation / + * compensation summation. + * + * High-order bits of the sum are in intermediateSum[0], low-order + * bits of the sum are in intermediateSum[1], any additional + * elements are application-specific. + * + * @param intermediateSum the high-order and low-order words of the intermediate sum + * @param value the name value to be included in the running sum + */ + static double[] sumWithCompensation(double[] intermediateSum, double value) { + double tmp = value - intermediateSum[1]; + double sum = intermediateSum[0]; + double velvel = sum + tmp; // Little wolf of rounding error + intermediateSum[1] = (velvel - sum) - tmp; + intermediateSum[0] = velvel; + return intermediateSum; + } + + /** * Returns a {@code Collector} that produces the arithmetic mean of an integer-valued * function applied to the input elements. If no elements are present, @@ -560,17 +589,31 @@ public final class Collectors { * value is a {@code NaN} or the sum is at any point a {@code NaN} then the * average will be {@code NaN}. * + * @implNote The {@code double} format can represent all + * consecutive integers in the range -253 to + * 253. If the pipeline has more than 253 + * values, the divisor in the average computation will saturate at + * 253, leading to additional numerical errors. + * * @param the type of the input elements * @param mapper a function extracting the property to be summed * @return a {@code Collector} that produces the sum of a derived property */ public static Collector averagingDouble(ToDoubleFunction mapper) { + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum, index 1 holds + * the low-order bits of the sum computed via compensated + * summation, and index 2 holds the number of values seen. + */ return new CollectorImpl<>( - () -> new double[2], - (a, t) -> { a[0] += mapper.applyAsDouble(t); a[1]++; }, - (a, b) -> { a[0] += b[0]; a[1] += b[1]; return a; }, - a -> (a[1] == 0) ? 0.0d : a[0] / a[1], CH_NOID); + () -> new double[3], + (a, t) -> { sumWithCompensation(a, mapper.applyAsDouble(t)); a[2]++; }, + (a, b) -> { sumWithCompensation(a, b[0]); sumWithCompensation(a, b[1]); a[2] += b[2]; return a; }, + // Better error bounds to add both terms as the final sum to compute average + a -> (a[2] == 0) ? 0.0d : ((a[0] + a[1]) / a[2]), + CH_NOID); } /** diff --git a/jdk/src/share/classes/java/util/stream/DoublePipeline.java b/jdk/src/share/classes/java/util/stream/DoublePipeline.java index 6af346c56b1..ad9c0563824 100644 --- a/jdk/src/share/classes/java/util/stream/DoublePipeline.java +++ b/jdk/src/share/classes/java/util/stream/DoublePipeline.java @@ -377,8 +377,23 @@ abstract class DoublePipeline @Override public final double sum() { - // TODO: better algorithm to compensate for errors - return reduce(0.0, Double::sum); + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum and index 1 + * holds the low-order bits of the sum computed via + * compensated summation. + */ + double[] summation = collect(() -> new double[2], + (ll, d) -> { + Collectors.sumWithCompensation(ll, d); + }, + (ll, rr) -> { + Collectors.sumWithCompensation(ll, rr[0]); + Collectors.sumWithCompensation(ll, rr[1]); + }); + + // Better error bounds to add both terms as the final sum + return summation[0] + summation[1]; } @Override @@ -391,20 +406,37 @@ abstract class DoublePipeline return reduce(Math::max); } + /** + * {@inheritDoc} + * + * @implNote The {@code double} format can represent all + * consecutive integers in the range -253 to + * 253. If the pipeline has more than 253 + * values, the divisor in the average computation will saturate at + * 253, leading to additional numerical errors. + */ @Override public final OptionalDouble average() { - double[] avg = collect(() -> new double[2], - (ll, i) -> { - ll[0]++; - ll[1] += i; + /* + * In the arrays allocated for the collect operation, index 0 + * holds the high-order bits of the running sum, index 1 holds + * the low-order bits of the sum computed via compensated + * summation, and index 2 holds the number of values seen. + */ + double[] avg = collect(() -> new double[3], + (ll, d) -> { + ll[2]++; + Collectors.sumWithCompensation(ll, d); }, (ll, rr) -> { - ll[0] += rr[0]; - ll[1] += rr[1]; + Collectors.sumWithCompensation(ll, rr[0]); + Collectors.sumWithCompensation(ll, rr[1]); + ll[2] += rr[2]; }); - return avg[0] > 0 - ? OptionalDouble.of(avg[1] / avg[0]) - : OptionalDouble.empty(); + return avg[2] > 0 + // Better error bounds to add both terms as the final sum to compute average + ? OptionalDouble.of((avg[0] + avg[1]) / avg[2]) + : OptionalDouble.empty(); } @Override diff --git a/jdk/test/java/util/stream/TestDoubleSumAverage.java b/jdk/test/java/util/stream/TestDoubleSumAverage.java new file mode 100644 index 00000000000..01453bd06d4 --- /dev/null +++ b/jdk/test/java/util/stream/TestDoubleSumAverage.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.*; +import java.util.function.*; +import java.util.stream.*; + +/* + * @test + * @bug 8006572 + * @summary Test for use of non-naive summation in stream-related sum and average operations. + */ +public class TestDoubleSumAverage { + public static void main(String... args) { + int failures = 0; + + failures += testForCompenstation(); + failures += testZeroAverageOfNonEmptyStream(); + + if (failures > 0) { + throw new RuntimeException("Found " + failures + " numerical failure(s)."); + } + } + + /** + * Compute the sum and average of a sequence of double values in + * various ways and report an error if naive summation is used. + */ + private static int testForCompenstation() { + int failures = 0; + + /* + * The exact sum of the test stream is 1 + 1e6*ulp(1.0) but a + * naive summation algorithm will return 1.0 since (1.0 + + * ulp(1.0)/2) will round to 1.0 again. + */ + double base = 1.0; + double increment = Math.ulp(base)/2.0; + int count = 1_000_001; + + double expectedSum = base + (increment * (count - 1)); + double expectedAvg = expectedSum / count; + + // Factory for double a stream of [base, increment, ..., increment] limited to a size of count + Supplier ds = () -> DoubleStream.iterate(base, e -> increment).limit(count); + + DoubleSummaryStatistics stats = ds.get().collect(DoubleSummaryStatistics::new, + DoubleSummaryStatistics::accept, + DoubleSummaryStatistics::combine); + + failures += compareUlpDifference(expectedSum, stats.getSum(), 3); + failures += compareUlpDifference(expectedAvg, stats.getAverage(), 3); + + failures += compareUlpDifference(expectedSum, + ds.get().sum(), 3); + failures += compareUlpDifference(expectedAvg, + ds.get().average().getAsDouble(), 3); + + failures += compareUlpDifference(expectedSum, + ds.get().boxed().collect(Collectors.summingDouble(d -> d)), 3); + failures += compareUlpDifference(expectedAvg, + ds.get().boxed().collect(Collectors.averagingDouble(d -> d)),3); + return failures; + } + + /** + * Test to verify that a non-empty stream with a zero average is non-empty. + */ + private static int testZeroAverageOfNonEmptyStream() { + Supplier ds = () -> DoubleStream.iterate(0.0, e -> 0.0).limit(10); + + return compareUlpDifference(0.0, ds.get().average().getAsDouble(), 0); + } + + /** + * Compute the ulp difference of two double values and compare against an error threshold. + */ + private static int compareUlpDifference(double expected, double computed, double threshold) { + double ulpDifference = Math.abs(expected - computed) / Math.ulp(expected); + + if (ulpDifference > threshold) { + System.err.printf("Numerical summation error too large, %g ulps rather than %g.%n", + ulpDifference, threshold); + return 1; + } else + return 0; + } +}