8277175: Add a parallel multiply method to BigInteger
Reviewed-by: psandoz
This commit is contained in:
parent
0786ddb471
commit
83ffbd2e7a
@ -36,6 +36,9 @@ import java.io.ObjectStreamField;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.ForkJoinWorkerThread;
|
||||
import java.util.concurrent.RecursiveTask;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
import jdk.internal.math.DoubleConsts;
|
||||
@ -1581,7 +1584,30 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
* @return {@code this * val}
|
||||
*/
|
||||
public BigInteger multiply(BigInteger val) {
|
||||
return multiply(val, false);
|
||||
return multiply(val, false, false, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a BigInteger whose value is {@code (this * val)}.
|
||||
* When both {@code this} and {@code val} are large, typically
|
||||
* in the thousands of bits, parallel multiply might be used.
|
||||
* This method returns the exact same mathematical result as
|
||||
* {@link #multiply}.
|
||||
*
|
||||
* @implNote This implementation may offer better algorithmic
|
||||
* performance when {@code val == this}.
|
||||
*
|
||||
* @implNote Compared to {@link #multiply}, an implementation's
|
||||
* parallel multiplication algorithm would typically use more
|
||||
* CPU resources to compute the result faster, and may do so
|
||||
* with a slight increase in memory consumption.
|
||||
*
|
||||
* @param val value to be multiplied by this BigInteger.
|
||||
* @return {@code this * val}
|
||||
* @see #multiply
|
||||
*/
|
||||
public BigInteger parallelMultiply(BigInteger val) {
|
||||
return multiply(val, false, true, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1590,16 +1616,17 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
*
|
||||
* @param val value to be multiplied by this BigInteger.
|
||||
* @param isRecursion whether this is a recursive invocation
|
||||
* @param parallel whether the multiply should be done in parallel
|
||||
* @return {@code this * val}
|
||||
*/
|
||||
private BigInteger multiply(BigInteger val, boolean isRecursion) {
|
||||
private BigInteger multiply(BigInteger val, boolean isRecursion, boolean parallel, int depth) {
|
||||
if (val.signum == 0 || signum == 0)
|
||||
return ZERO;
|
||||
|
||||
int xlen = mag.length;
|
||||
|
||||
if (val == this && xlen > MULTIPLY_SQUARE_THRESHOLD) {
|
||||
return square();
|
||||
return square(true, parallel, depth);
|
||||
}
|
||||
|
||||
int ylen = val.mag.length;
|
||||
@ -1677,7 +1704,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
}
|
||||
}
|
||||
|
||||
return multiplyToomCook3(this, val);
|
||||
return multiplyToomCook3(this, val, parallel, depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1844,6 +1871,88 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
private abstract static sealed class RecursiveOp extends RecursiveTask<BigInteger> {
|
||||
/**
|
||||
* The threshold until when we should continue forking recursive ops
|
||||
* if parallel is true. This threshold is only relevant for Toom Cook 3
|
||||
* multiply and square.
|
||||
*/
|
||||
private static final int PARALLEL_FORK_DEPTH_THRESHOLD =
|
||||
calculateMaximumDepth(ForkJoinPool.getCommonPoolParallelism());
|
||||
|
||||
private static final int calculateMaximumDepth(int parallelism) {
|
||||
return 32 - Integer.numberOfLeadingZeros(parallelism);
|
||||
}
|
||||
|
||||
final boolean parallel;
|
||||
/**
|
||||
* The current recursing depth. Since it is a logarithmic algorithm,
|
||||
* we do not need an int to hold the number.
|
||||
*/
|
||||
final byte depth;
|
||||
|
||||
private RecursiveOp(boolean parallel, int depth) {
|
||||
this.parallel = parallel;
|
||||
this.depth = (byte) depth;
|
||||
}
|
||||
|
||||
private static int getParallelForkDepthThreshold() {
|
||||
if (Thread.currentThread() instanceof ForkJoinWorkerThread fjwt) {
|
||||
return calculateMaximumDepth(fjwt.getPool().getParallelism());
|
||||
}
|
||||
else {
|
||||
return PARALLEL_FORK_DEPTH_THRESHOLD;
|
||||
}
|
||||
}
|
||||
|
||||
protected RecursiveTask<BigInteger> forkOrInvoke() {
|
||||
if (parallel && depth <= getParallelForkDepthThreshold()) fork();
|
||||
else invoke();
|
||||
return this;
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
private static final class RecursiveMultiply extends RecursiveOp {
|
||||
private final BigInteger a;
|
||||
private final BigInteger b;
|
||||
|
||||
public RecursiveMultiply(BigInteger a, BigInteger b, boolean parallel, int depth) {
|
||||
super(parallel, depth);
|
||||
this.a = a;
|
||||
this.b = b;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BigInteger compute() {
|
||||
return a.multiply(b, true, parallel, depth);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("serial")
|
||||
private static final class RecursiveSquare extends RecursiveOp {
|
||||
private final BigInteger a;
|
||||
|
||||
public RecursiveSquare(BigInteger a, boolean parallel, int depth) {
|
||||
super(parallel, depth);
|
||||
this.a = a;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BigInteger compute() {
|
||||
return a.square(true, parallel, depth);
|
||||
}
|
||||
}
|
||||
|
||||
private static RecursiveTask<BigInteger> multiply(BigInteger a, BigInteger b, boolean parallel, int depth) {
|
||||
return new RecursiveMultiply(a, b, parallel, depth).forkOrInvoke();
|
||||
}
|
||||
|
||||
private static RecursiveTask<BigInteger> square(BigInteger a, boolean parallel, int depth) {
|
||||
return new RecursiveSquare(a, parallel, depth).forkOrInvoke();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiplies two BigIntegers using a 3-way Toom-Cook multiplication
|
||||
* algorithm. This is a recursive divide-and-conquer algorithm which is
|
||||
@ -1872,7 +1981,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
* LNCS #4547. Springer, Madrid, Spain, June 21-22, 2007.
|
||||
*
|
||||
*/
|
||||
private static BigInteger multiplyToomCook3(BigInteger a, BigInteger b) {
|
||||
private static BigInteger multiplyToomCook3(BigInteger a, BigInteger b, boolean parallel, int depth) {
|
||||
int alen = a.mag.length;
|
||||
int blen = b.mag.length;
|
||||
|
||||
@ -1896,16 +2005,20 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
|
||||
BigInteger v0, v1, v2, vm1, vinf, t1, t2, tm1, da1, db1;
|
||||
|
||||
v0 = a0.multiply(b0, true);
|
||||
depth++;
|
||||
var v0_task = RecursiveOp.multiply(a0, b0, parallel, depth);
|
||||
da1 = a2.add(a0);
|
||||
db1 = b2.add(b0);
|
||||
vm1 = da1.subtract(a1).multiply(db1.subtract(b1), true);
|
||||
var vm1_task = RecursiveOp.multiply(da1.subtract(a1), db1.subtract(b1), parallel, depth);
|
||||
da1 = da1.add(a1);
|
||||
db1 = db1.add(b1);
|
||||
v1 = da1.multiply(db1, true);
|
||||
var v1_task = RecursiveOp.multiply(da1, db1, parallel, depth);
|
||||
v2 = da1.add(a2).shiftLeft(1).subtract(a0).multiply(
|
||||
db1.add(b2).shiftLeft(1).subtract(b0), true);
|
||||
vinf = a2.multiply(b2, true);
|
||||
db1.add(b2).shiftLeft(1).subtract(b0), true, parallel, depth);
|
||||
vinf = a2.multiply(b2, true, parallel, depth);
|
||||
v0 = v0_task.join();
|
||||
vm1 = vm1_task.join();
|
||||
v1 = v1_task.join();
|
||||
|
||||
// The algorithm requires two divisions by 2 and one by 3.
|
||||
// All divisions are known to be exact, that is, they do not produce
|
||||
@ -2071,7 +2184,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
* @return <code>this<sup>2</sup></code>
|
||||
*/
|
||||
private BigInteger square() {
|
||||
return square(false);
|
||||
return square(false, false, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2081,7 +2194,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
* @param isRecursion whether this is a recursive invocation
|
||||
* @return <code>this<sup>2</sup></code>
|
||||
*/
|
||||
private BigInteger square(boolean isRecursion) {
|
||||
private BigInteger square(boolean isRecursion, boolean parallel, int depth) {
|
||||
if (signum == 0) {
|
||||
return ZERO;
|
||||
}
|
||||
@ -2103,7 +2216,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
}
|
||||
}
|
||||
|
||||
return squareToomCook3();
|
||||
return squareToomCook3(parallel, depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2237,7 +2350,7 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
* that has better asymptotic performance than the algorithm used in
|
||||
* squareToLen or squareKaratsuba.
|
||||
*/
|
||||
private BigInteger squareToomCook3() {
|
||||
private BigInteger squareToomCook3(boolean parallel, int depth) {
|
||||
int len = mag.length;
|
||||
|
||||
// k is the size (in ints) of the lower-order slices.
|
||||
@ -2254,13 +2367,17 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
a0 = getToomSlice(k, r, 2, len);
|
||||
BigInteger v0, v1, v2, vm1, vinf, t1, t2, tm1, da1;
|
||||
|
||||
v0 = a0.square(true);
|
||||
depth++;
|
||||
var v0_fork = RecursiveOp.square(a0, parallel, depth);
|
||||
da1 = a2.add(a0);
|
||||
vm1 = da1.subtract(a1).square(true);
|
||||
var vm1_fork = RecursiveOp.square(da1.subtract(a1), parallel, depth);
|
||||
da1 = da1.add(a1);
|
||||
v1 = da1.square(true);
|
||||
vinf = a2.square(true);
|
||||
v2 = da1.add(a2).shiftLeft(1).subtract(a0).square(true);
|
||||
var v1_fork = RecursiveOp.square(da1, parallel, depth);
|
||||
vinf = a2.square(true, parallel, depth);
|
||||
v2 = da1.add(a2).shiftLeft(1).subtract(a0).square(true, parallel, depth);
|
||||
v0 = v0_fork.join();
|
||||
vm1 = vm1_fork.join();
|
||||
v1 = v1_fork.join();
|
||||
|
||||
// The algorithm requires two divisions by 2 and one by 3.
|
||||
// All divisions are known to be exact, that is, they do not produce
|
||||
|
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @run main BigIntegerParallelMultiplyTest
|
||||
* @summary tests parallelMultiply() method in BigInteger
|
||||
* @author Heinz Kabutz heinz@javaspecialists.eu
|
||||
*/
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.function.BinaryOperator;
|
||||
|
||||
/**
|
||||
* This is a simple test class created to ensure that the results
|
||||
* of multiply() are the same as multiplyParallel(). We calculate
|
||||
* the Fibonacci numbers using Dijkstra's sum of squares to get
|
||||
* very large numbers (hundreds of thousands of bits).
|
||||
*
|
||||
* @author Heinz Kabutz, heinz@javaspecialists.eu
|
||||
*/
|
||||
public class BigIntegerParallelMultiplyTest {
|
||||
public static BigInteger fibonacci(int n, BinaryOperator<BigInteger> multiplyOperator) {
|
||||
if (n == 0) return BigInteger.ZERO;
|
||||
if (n == 1) return BigInteger.ONE;
|
||||
|
||||
int half = (n + 1) / 2;
|
||||
BigInteger f0 = fibonacci(half - 1, multiplyOperator);
|
||||
BigInteger f1 = fibonacci(half, multiplyOperator);
|
||||
if (n % 2 == 1) {
|
||||
BigInteger b0 = multiplyOperator.apply(f0, f0);
|
||||
BigInteger b1 = multiplyOperator.apply(f1, f1);
|
||||
return b0.add(b1);
|
||||
} else {
|
||||
BigInteger b0 = f0.shiftLeft(1).add(f1);
|
||||
return multiplyOperator.apply(b0, f1);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
compare(1000, 324);
|
||||
compare(10_000, 3473);
|
||||
compare(100_000, 34883);
|
||||
compare(1_000_000, 347084);
|
||||
}
|
||||
|
||||
private static void compare(int n, int expectedBitCount) {
|
||||
BigInteger multiplyResult = fibonacci(n, BigInteger::multiply);
|
||||
BigInteger parallelMultiplyResult = fibonacci(n, BigInteger::parallelMultiply);
|
||||
checkBitCount(n, expectedBitCount, multiplyResult);
|
||||
checkBitCount(n, expectedBitCount, parallelMultiplyResult);
|
||||
if (!multiplyResult.equals(parallelMultiplyResult))
|
||||
throw new AssertionError("multiply() and parallelMultiply() give different results");
|
||||
}
|
||||
|
||||
private static void checkBitCount(int n, int expectedBitCount, BigInteger number) {
|
||||
if (number.bitCount() != expectedBitCount)
|
||||
throw new AssertionError(
|
||||
"bitCount of fibonacci(" + n + ") was expected to be " + expectedBitCount
|
||||
+ " but was " + number.bitCount());
|
||||
}
|
||||
}
|
@ -0,0 +1,322 @@
|
||||
package org.openjdk.bench.java.math;
|
||||
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.MalformedObjectNameException;
|
||||
import javax.management.ObjectName;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.LongSummaryStatistics;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.ForkJoinWorkerThread;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.function.BinaryOperator;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static java.util.concurrent.ForkJoinPool.defaultForkJoinWorkerThreadFactory;
|
||||
|
||||
/**
|
||||
* Benchmark for checking performance difference between sequential and parallel
|
||||
* multiply of very large Mersenne primes using BigInteger. We want to measure
|
||||
* real time, user time, system time and the amount of memory allocated. To
|
||||
* calculate this, we create our own thread factory for the common ForkJoinPool
|
||||
* and then use that to measure user time, cpu time and bytes allocated.
|
||||
* <p>
|
||||
* We use reflection to discover all methods that match "*ultiply", and use them
|
||||
* to multiply two very large Mersenne primes together.
|
||||
* <p>
|
||||
* <h3>Results on a 1-6-2 machine running Ubuntu linux</h3>
|
||||
* <p>
|
||||
* Memory allocation increased from 83.9GB to 84GB, for both the sequential and
|
||||
* parallel versions. This is an increase of just 0.1%. On this machine, the
|
||||
* parallel version was 3.8x faster in latency (real time), but it used 2.7x
|
||||
* more CPU resources.
|
||||
* <p>
|
||||
* Testing multiplying Mersenne primes of 2^57885161-1 and 2^82589933-1
|
||||
* <p>
|
||||
* <pre>
|
||||
* openjdk version "18-internal" 2022-03-15
|
||||
* BigInteger.parallelMultiply()
|
||||
* real 0m6.288s
|
||||
* user 1m3.010s
|
||||
* sys 0m0.027s
|
||||
* mem 84.0GB
|
||||
* BigInteger.multiply()
|
||||
* real 0m23.682s
|
||||
* user 0m23.530s
|
||||
* sys 0m0.004s
|
||||
* mem 84.0GB
|
||||
*
|
||||
* openjdk version "1.8.0_302"
|
||||
* BigInteger.multiply()
|
||||
* real 0m25.657s
|
||||
* user 0m25.390s
|
||||
* sys 0m0.001s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "9.0.7.1"
|
||||
* BigInteger.multiply()
|
||||
* real 0m24.907s
|
||||
* user 0m24.700s
|
||||
* sys 0m0.001s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "10.0.2" 2018-07-17
|
||||
* BigInteger.multiply()
|
||||
* real 0m24.632s
|
||||
* user 0m24.380s
|
||||
* sys 0m0.004s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "11.0.12" 2021-07-20 LTS
|
||||
* BigInteger.multiply()
|
||||
* real 0m22.114s
|
||||
* user 0m21.930s
|
||||
* sys 0m0.001s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "12.0.2" 2019-07-16
|
||||
* BigInteger.multiply()
|
||||
* real 0m23.015s
|
||||
* user 0m22.830s
|
||||
* sys 0m0.000s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "13.0.9" 2021-10-19
|
||||
* BigInteger.multiply()
|
||||
* real 0m23.548s
|
||||
* user 0m23.350s
|
||||
* sys 0m0.005s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "14.0.2" 2020-07-14
|
||||
* BigInteger.multiply()
|
||||
* real 0m22.918s
|
||||
* user 0m22.530s
|
||||
* sys 0m0.131s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "15.0.5" 2021-10-19
|
||||
* BigInteger.multiply()
|
||||
* real 0m22.038s
|
||||
* user 0m21.750s
|
||||
* sys 0m0.003s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "16.0.2" 2021-07-20
|
||||
* BigInteger.multiply()
|
||||
* real 0m23.049s
|
||||
* user 0m22.760s
|
||||
* sys 0m0.006s
|
||||
* mem 83.9GB
|
||||
*
|
||||
* openjdk version "17" 2021-09-14
|
||||
* BigInteger.multiply()
|
||||
* real 0m22.580s
|
||||
* user 0m22.310s
|
||||
* sys 0m0.001s
|
||||
* mem 83.9GB
|
||||
*</pre>
|
||||
*
|
||||
* @author Heinz Kabutz, heinz@javaspecialists.eu
|
||||
*/
|
||||
public class BigIntegerMersennePrimeMultiply implements ForkJoinPool.ForkJoinWorkerThreadFactory {
|
||||
// Large Mersenne prime discovered by Curtis Cooper in 2013
|
||||
private static final int EXPONENT_1 = 57885161;
|
||||
private static final BigInteger MERSENNE_1 =
|
||||
BigInteger.ONE.shiftLeft(EXPONENT_1).subtract(BigInteger.ONE);
|
||||
// Largest Mersenne prime number discovered by Patrick Laroche in 2018
|
||||
private static final int EXPONENT_2 = 82589933;
|
||||
private static final BigInteger MERSENNE_2 =
|
||||
BigInteger.ONE.shiftLeft(EXPONENT_2).subtract(BigInteger.ONE);
|
||||
private static boolean DEBUG = false;
|
||||
|
||||
public static void main(String... args) {
|
||||
System.setProperty("java.util.concurrent.ForkJoinPool.common.threadFactory",
|
||||
BigIntegerMersennePrimeMultiply.class.getName());
|
||||
System.out.println("Testing multiplying Mersenne primes of " +
|
||||
"2^" + EXPONENT_1 + "-1 and 2^" + EXPONENT_2 + "-1");
|
||||
addCounters(Thread.currentThread());
|
||||
System.out.println("Using the following multiply methods:");
|
||||
List<Method> methods = Arrays.stream(BigInteger.class.getMethods())
|
||||
.filter(method -> method.getName().endsWith("ultiply") &&
|
||||
method.getParameterCount() == 1 &&
|
||||
method.getParameterTypes()[0] == BigInteger.class)
|
||||
.peek(method -> System.out.println(" " + method))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
System.out.println();
|
||||
methods.forEach(BigIntegerMersennePrimeMultiply::test);
|
||||
}
|
||||
}
|
||||
|
||||
private static void test(Method method) {
|
||||
BinaryOperator<BigInteger> multiplyOperator = (a, b) -> {
|
||||
try {
|
||||
return (BigInteger) method.invoke(a, b);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new AssertionError(e);
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new AssertionError(e.getCause());
|
||||
}
|
||||
};
|
||||
test(method.getName(), multiplyOperator);
|
||||
}
|
||||
|
||||
private static void test(String description,
|
||||
BinaryOperator<BigInteger> multiplyOperator) {
|
||||
System.out.println("BigInteger." + description + "()");
|
||||
resetAllCounters();
|
||||
long elapsedTimeInNanos = System.nanoTime();
|
||||
try {
|
||||
BigInteger result1 = multiplyOperator.apply(MERSENNE_1, MERSENNE_2);
|
||||
BigInteger result2 = multiplyOperator.apply(MERSENNE_2, MERSENNE_1);
|
||||
if (result1.bitLength() != 140475094)
|
||||
throw new AssertionError("Expected bitLength: 140475094, " +
|
||||
"but was " + result1.bitLength());
|
||||
if (result2.bitLength() != 140475094)
|
||||
throw new AssertionError("Expected bitLength: 140475094, " +
|
||||
"but was " + result1.bitLength());
|
||||
} finally {
|
||||
elapsedTimeInNanos = System.nanoTime() - elapsedTimeInNanos;
|
||||
}
|
||||
|
||||
LongSummaryStatistics userTimeStatistics = getStatistics(userTime);
|
||||
LongSummaryStatistics cpuTimeStatistics = getStatistics(cpuTime);
|
||||
LongSummaryStatistics memoryAllocationStatistics = getStatistics(bytes);
|
||||
System.out.println("real " + formatTime(elapsedTimeInNanos));
|
||||
System.out.println("user " + formatTime(userTimeStatistics.getSum()));
|
||||
System.out.println("sys " +
|
||||
formatTime(cpuTimeStatistics.getSum() - userTimeStatistics.getSum()));
|
||||
System.out.println("mem " + formatMemory(memoryAllocationStatistics.getSum(), 1));
|
||||
}
|
||||
|
||||
private static LongSummaryStatistics getStatistics(Map<Thread, AtomicLong> timeMap) {
|
||||
return timeMap.entrySet()
|
||||
.stream()
|
||||
.peek(entry -> {
|
||||
long timeInMs = (counterExtractorMap.get(timeMap)
|
||||
.applyAsLong(entry.getKey().getId())
|
||||
- entry.getValue().get());
|
||||
entry.getValue().set(timeInMs);
|
||||
})
|
||||
.peek(BigIntegerMersennePrimeMultiply::printTime)
|
||||
.map(Map.Entry::getValue)
|
||||
.mapToLong(AtomicLong::get)
|
||||
.summaryStatistics();
|
||||
}
|
||||
|
||||
private static void printTime(Map.Entry<Thread, AtomicLong> threadCounter) {
|
||||
if (DEBUG)
|
||||
System.out.printf("%s %d%n", threadCounter.getKey(), threadCounter.getValue()
|
||||
.get());
|
||||
}
|
||||
|
||||
private static void addCounters(Thread thread) {
|
||||
counterExtractorMap.forEach((map, timeExtractor) -> add(map, thread, timeExtractor));
|
||||
}
|
||||
|
||||
private static void add(Map<Thread, AtomicLong> time, Thread thread,
|
||||
LongUnaryOperator timeExtractor) {
|
||||
time.put(thread, new AtomicLong(timeExtractor.applyAsLong(thread.getId())));
|
||||
}
|
||||
|
||||
private static void resetAllCounters() {
|
||||
counterExtractorMap.forEach(BigIntegerMersennePrimeMultiply::resetTimes);
|
||||
}
|
||||
|
||||
private static void resetTimes(Map<Thread, AtomicLong> timeMap, LongUnaryOperator timeMethod) {
|
||||
timeMap.forEach((thread, time) ->
|
||||
time.set(timeMethod.applyAsLong(thread.getId())));
|
||||
}
|
||||
|
||||
private static final Map<Thread, AtomicLong> userTime =
|
||||
new ConcurrentHashMap<>();
|
||||
private static final Map<Thread, AtomicLong> cpuTime =
|
||||
new ConcurrentHashMap<>();
|
||||
private static final Map<Thread, AtomicLong> bytes =
|
||||
new ConcurrentHashMap<>();
|
||||
private static final ThreadMXBean tmb = ManagementFactory.getThreadMXBean();
|
||||
|
||||
private static final Map<Map<Thread, AtomicLong>, LongUnaryOperator> counterExtractorMap =
|
||||
new IdentityHashMap<>();
|
||||
|
||||
static {
|
||||
counterExtractorMap.put(userTime, tmb::getThreadUserTime);
|
||||
counterExtractorMap.put(cpuTime, tmb::getThreadCpuTime);
|
||||
counterExtractorMap.put(bytes, BigIntegerMersennePrimeMultiply::threadAllocatedBytes);
|
||||
}
|
||||
|
||||
public final ForkJoinWorkerThread newThread(ForkJoinPool pool) {
|
||||
ForkJoinWorkerThread thread = defaultForkJoinWorkerThreadFactory.newThread(pool);
|
||||
addCounters(thread);
|
||||
return thread;
|
||||
}
|
||||
|
||||
private static final String[] SIGNATURE = new String[]{long.class.getName()};
|
||||
private static final MBeanServer mBeanServer;
|
||||
private static final ObjectName name;
|
||||
|
||||
static {
|
||||
try {
|
||||
name = new ObjectName(ManagementFactory.THREAD_MXBEAN_NAME);
|
||||
mBeanServer = ManagementFactory.getPlatformMBeanServer();
|
||||
} catch (MalformedObjectNameException e) {
|
||||
throw new ExceptionInInitializerError(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static long threadAllocatedBytes(long threadId) {
|
||||
try {
|
||||
return (long) mBeanServer.invoke(
|
||||
name,
|
||||
"getThreadAllocatedBytes",
|
||||
new Object[]{threadId},
|
||||
SIGNATURE
|
||||
);
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static String formatMemory(double bytes, int decimals) {
|
||||
double val;
|
||||
String unitStr;
|
||||
if (bytes < 1024) {
|
||||
val = bytes;
|
||||
unitStr = "B";
|
||||
} else if (bytes < 1024 * 1024) {
|
||||
val = bytes / 1024;
|
||||
unitStr = "KB";
|
||||
} else if (bytes < 1024 * 1024 * 1024) {
|
||||
val = bytes / (1024 * 1024);
|
||||
unitStr = "MB";
|
||||
} else if (bytes < 1024 * 1024 * 1024 * 1024L) {
|
||||
val = bytes / (1024 * 1024 * 1024L);
|
||||
unitStr = "GB";
|
||||
} else {
|
||||
val = bytes / (1024 * 1024 * 1024 * 1024L);
|
||||
unitStr = "TB";
|
||||
}
|
||||
return String.format(Locale.US, "%." + decimals + "f%s", val, unitStr);
|
||||
}
|
||||
|
||||
public static String formatTime(long nanos) {
|
||||
if (nanos < 0) nanos = 0;
|
||||
long timeInMs = TimeUnit.NANOSECONDS.toMillis(nanos);
|
||||
long minutes = timeInMs / 60_000;
|
||||
double remainingMs = (timeInMs % 60_000) / 1000.0;
|
||||
return String.format(Locale.US, "%dm%.3fs", minutes, remainingMs);
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package org.openjdk.bench.java.math;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.BinaryOperator;
|
||||
|
||||
/**
|
||||
* Benchmark for checking performance difference between
|
||||
* sequential and parallel multiply methods in BigInteger,
|
||||
* using a large Fibonacci calculation of up to n = 100 million.
|
||||
*
|
||||
* @author Heinz Kabutz, heinz@javaspecialists.eu
|
||||
*/
|
||||
@BenchmarkMode(Mode.SingleShotTime)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@Fork(value = 2)
|
||||
@Warmup(iterations = 2)
|
||||
@Measurement(iterations = 2) // only 2 iterations because each one takes very long
|
||||
@State(Scope.Thread)
|
||||
public class BigIntegerParallelMultiply {
|
||||
private static BigInteger fibonacci(int n, BinaryOperator<BigInteger> multiplyOperator) {
|
||||
if (n == 0) return BigInteger.ZERO;
|
||||
if (n == 1) return BigInteger.ONE;
|
||||
|
||||
int half = (n + 1) / 2;
|
||||
BigInteger f0 = fibonacci(half - 1, multiplyOperator);
|
||||
BigInteger f1 = fibonacci(half, multiplyOperator);
|
||||
if (n % 2 == 1) {
|
||||
BigInteger b0 = multiplyOperator.apply(f0, f0);
|
||||
BigInteger b1 = multiplyOperator.apply(f1, f1);
|
||||
return b0.add(b1);
|
||||
} else {
|
||||
BigInteger b0 = f0.shiftLeft(1).add(f1);
|
||||
return multiplyOperator.apply(b0, f1);
|
||||
}
|
||||
}
|
||||
|
||||
@Param({"1000000", "10000000", "100000000"})
|
||||
private int n;
|
||||
|
||||
@Benchmark
|
||||
public void multiply() {
|
||||
fibonacci(n, BigInteger::multiply);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void parallelMultiply() {
|
||||
fibonacci(n, BigInteger::parallelMultiply);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user