8341260: Add Float16 to jdk.incubator.vector
Co-authored-by: Raffaello Giulietti <rgiulietti@openjdk.org> Co-authored-by: Jatin Bhateja <jbhateja@openjdk.org> Reviewed-by: rgiulietti
This commit is contained in:
parent
a5f11b5f77
commit
dbf23466af
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package jdk.incubator.vector;
|
||||
|
||||
import static jdk.incubator.vector.Float16.MIN_EXPONENT;
|
||||
import static jdk.incubator.vector.Float16.PRECISION;
|
||||
import static jdk.incubator.vector.Float16.SIZE;
|
||||
|
||||
/**
|
||||
* This class contains additional constants documenting limits of the
|
||||
* {@code Float16} type.
|
||||
*/
|
||||
|
||||
class Float16Consts {
|
||||
/**
|
||||
* Don't let anyone instantiate this class.
|
||||
*/
|
||||
private Float16Consts() {}
|
||||
|
||||
/**
|
||||
* The number of logical bits in the significand of a
|
||||
* {@code Float16} number, including the implicit bit.
|
||||
*/
|
||||
public static final int SIGNIFICAND_WIDTH = PRECISION;
|
||||
|
||||
/**
|
||||
* The exponent the smallest positive {@code Float16}
|
||||
* subnormal value would have if it could be normalized.
|
||||
*/
|
||||
public static final int MIN_SUB_EXPONENT =
|
||||
MIN_EXPONENT - (SIGNIFICAND_WIDTH - 1); // -24
|
||||
|
||||
/**
|
||||
* Bias used in representing a {@code Float16} exponent.
|
||||
*/
|
||||
public static final int EXP_BIAS =
|
||||
(1 << (SIZE - SIGNIFICAND_WIDTH - 1)) - 1; // 15
|
||||
|
||||
/**
|
||||
* Bit mask to isolate the sign bit of a {@code Float16}.
|
||||
*/
|
||||
public static final int SIGN_BIT_MASK = 1 << (SIZE - 1);
|
||||
|
||||
/**
|
||||
* Bit mask to isolate the exponent field of a {@code Float16}.
|
||||
*/
|
||||
public static final int EXP_BIT_MASK =
|
||||
((1 << (SIZE - SIGNIFICAND_WIDTH)) - 1) << (SIGNIFICAND_WIDTH - 1);
|
||||
|
||||
/**
|
||||
* Bit mask to isolate the significand field of a {@code Float16}.
|
||||
*/
|
||||
public static final int SIGNIF_BIT_MASK = (1 << (SIGNIFICAND_WIDTH - 1)) - 1;
|
||||
|
||||
/**
|
||||
* Bit mask to isolate the magnitude bits (combined exponent and
|
||||
* significand fields) of a {@code Float16}.
|
||||
*/
|
||||
public static final int MAG_BIT_MASK = EXP_BIT_MASK | SIGNIF_BIT_MASK;
|
||||
|
||||
static {
|
||||
// verify bit masks cover all bit positions and that the bit
|
||||
// masks are non-overlapping
|
||||
assert(((SIGN_BIT_MASK | EXP_BIT_MASK | SIGNIF_BIT_MASK) == 0xFFFF) &&
|
||||
(((SIGN_BIT_MASK & EXP_BIT_MASK) == 0) &&
|
||||
((SIGN_BIT_MASK & SIGNIF_BIT_MASK) == 0) &&
|
||||
((EXP_BIT_MASK & SIGNIF_BIT_MASK) == 0)) &&
|
||||
((SIGN_BIT_MASK | MAG_BIT_MASK) == 0xFFFF));
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,8 +23,9 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8205592
|
||||
* @summary Verify {double, float}Value methods work
|
||||
* @bug 8205592 8339252 8341260
|
||||
* @summary Verify {double, float, float16}Value methods work
|
||||
* @modules jdk.incubator.vector
|
||||
* @library /test/lib
|
||||
* @key randomness
|
||||
* @build jdk.test.lib.RandomFactory
|
||||
@ -35,6 +36,7 @@ import jdk.test.lib.RandomFactory;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Random;
|
||||
import jdk.incubator.vector.Float16;
|
||||
|
||||
public class DoubleFloatValueTests {
|
||||
private static final BigDecimal HALF = BigDecimal.valueOf(5, 1);
|
||||
@ -64,6 +66,18 @@ public class DoubleFloatValueTests {
|
||||
return bv.subtract(ulp.multiply(HALF));
|
||||
}
|
||||
|
||||
private static BigDecimal nextHalfUp(Float16 v) {
|
||||
BigDecimal bv = new BigDecimal(v.doubleValue());
|
||||
BigDecimal ulp = new BigDecimal(Float16.ulp(v).doubleValue());
|
||||
return bv.add(ulp.multiply(HALF));
|
||||
}
|
||||
|
||||
private static BigDecimal nextHalfDown(Float16 v) {
|
||||
BigDecimal bv = new BigDecimal(v.doubleValue());
|
||||
BigDecimal ulp = new BigDecimal(v.doubleValue() - Float16.nextDown(v).doubleValue());
|
||||
return bv.subtract(ulp.multiply(HALF));
|
||||
}
|
||||
|
||||
private static String toDecHexString(double v) {
|
||||
return v + " (" + Double.toHexString(v) + ")";
|
||||
}
|
||||
@ -72,6 +86,10 @@ public class DoubleFloatValueTests {
|
||||
return v + " (" + Float.toHexString(v) + ")";
|
||||
}
|
||||
|
||||
private static String toDecHexString(Float16 v) {
|
||||
return v + " (" + Float16.toHexString(v) + ")";
|
||||
}
|
||||
|
||||
private static void checkDouble(BigDecimal bd, double exp) {
|
||||
double res = bd.doubleValue();
|
||||
if (exp != res ) {
|
||||
@ -90,6 +108,15 @@ public class DoubleFloatValueTests {
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkFloat16(BigDecimal bv, Float16 exp) {
|
||||
Float16 res = Float16.valueOf(bv); // bv.float16Value();
|
||||
if (exp.floatValue() != res.floatValue()) {
|
||||
String message = "Bad conversion: got " + toDecHexString(res) +
|
||||
", expected " + toDecHexString(exp);
|
||||
throw new RuntimeException(message);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isOdd(int n) {
|
||||
return (n & 0x1) != 0;
|
||||
}
|
||||
@ -112,6 +139,15 @@ public class DoubleFloatValueTests {
|
||||
}
|
||||
}
|
||||
|
||||
private static void testFloat16ValueNearMinValue() {
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
BigDecimal b = nextHalfUp(Float16.multiply(Float16.valueOf(n), Float16.MIN_VALUE));
|
||||
checkFloat16(b, Float16.multiply(Float16.valueOf((n + 1) / 2 * 2), Float16.MIN_VALUE));
|
||||
checkFloat16(b.subtract(EPS), Float16.multiply(Float16.valueOf(n), Float16.MIN_VALUE));
|
||||
checkFloat16(b.add(EPS), Float16.multiply(Float16.valueOf(n + 1), Float16.MIN_VALUE));
|
||||
}
|
||||
}
|
||||
|
||||
private static void testDoubleValueNearMinNormal() {
|
||||
double v = Double.MIN_NORMAL;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
@ -150,6 +186,25 @@ public class DoubleFloatValueTests {
|
||||
}
|
||||
}
|
||||
|
||||
private static void testFloat16ValueNearMinNormal() {
|
||||
Float16 v = Float16.MIN_NORMAL;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
BigDecimal bv = nextHalfDown(v);
|
||||
checkFloat16(bv, isOdd(n) ? Float16.nextDown(v) : v);
|
||||
checkFloat16(bv.subtract(EPS), Float16.nextDown(v));
|
||||
checkFloat16(bv.add(EPS), v);
|
||||
v = Float16.nextDown(v);
|
||||
}
|
||||
v = Float16.MIN_NORMAL;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
BigDecimal bv = nextHalfUp(v);
|
||||
checkFloat16(bv, isOdd(n) ? Float16.nextUp(v) : v);
|
||||
checkFloat16(bv.subtract(EPS), v);
|
||||
checkFloat16(bv.add(EPS), Float16.nextUp(v));
|
||||
v = Float16.nextUp(v);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testDoubleValueNearMaxValue() {
|
||||
double v = Double.MAX_VALUE;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
@ -180,6 +235,21 @@ public class DoubleFloatValueTests {
|
||||
checkFloat(bv.add(EPS), Float.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
private static void testFloat16ValueNearMaxValue() {
|
||||
Float16 v = Float16.MAX_VALUE;
|
||||
for (int n = 0; n < 100; ++n) {
|
||||
BigDecimal bv = nextHalfDown(v);
|
||||
checkFloat16(bv, isOdd(n) ? v : Float16.nextDown(v));
|
||||
checkFloat16(bv.subtract(EPS), Float16.nextDown(v));
|
||||
checkFloat16(bv.add(EPS), v);
|
||||
v = Float16.nextDown(v);
|
||||
}
|
||||
BigDecimal bv = nextHalfUp(Float16.MAX_VALUE);
|
||||
checkFloat16(bv, Float16.POSITIVE_INFINITY);
|
||||
checkFloat16(bv.subtract(EPS), Float16.MAX_VALUE);
|
||||
checkFloat16(bv.add(EPS), Float16.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
private static void testDoubleValueRandom() {
|
||||
Random r = RandomFactory.getRandom();
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
@ -228,18 +298,49 @@ public class DoubleFloatValueTests {
|
||||
}
|
||||
}
|
||||
|
||||
private static void testFloat16ValueRandom() {
|
||||
Random r = RandomFactory.getRandom();
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
Float16 v = Float16.valueOf(r.nextFloat(-Float16.MAX_VALUE.floatValue(), Float16.MAX_VALUE.floatValue()));
|
||||
checkFloat16(new BigDecimal(v.floatValue()), v);
|
||||
}
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
Float16 v = Float16.valueOf(r.nextFloat(-1e4f, 1e4f));
|
||||
checkFloat16(new BigDecimal(v.floatValue()), v);
|
||||
}
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
Float16 v = Float16.valueOf(r.nextFloat(-1e3f, 1e3f));
|
||||
checkFloat16(new BigDecimal(v.floatValue()), v);
|
||||
}
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
Float16 v = Float16.valueOf(r.nextFloat(-1e-3f, 1e-3f));
|
||||
checkFloat16(new BigDecimal(v.floatValue()), v);
|
||||
}
|
||||
for (int i = 0; i < 10_000; ++i) {
|
||||
Float16 v = Float16.valueOf(r.nextFloat(-1e-4f, 1e-4f));
|
||||
checkFloat16(new BigDecimal(v.floatValue()), v);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testDoubleValueExtremes() {
|
||||
checkDouble(BigDecimal.valueOf(1, 1000), 0.0);
|
||||
checkDouble(BigDecimal.valueOf(-1, 1000), -0.0);
|
||||
checkDouble(BigDecimal.valueOf(1, -1000), Double.POSITIVE_INFINITY);
|
||||
checkDouble(BigDecimal.valueOf(-1, -1000), -Double.POSITIVE_INFINITY);
|
||||
checkDouble(BigDecimal.valueOf(-1, -1000), Double.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
private static void testFloatValueExtremes() {
|
||||
checkFloat(BigDecimal.valueOf(1, 1000), 0.0f);
|
||||
checkFloat(BigDecimal.valueOf(-1, 1000), -0.0f);
|
||||
checkFloat(BigDecimal.valueOf(1, -1000), Float.POSITIVE_INFINITY);
|
||||
checkFloat(BigDecimal.valueOf(-1, -1000), -Float.POSITIVE_INFINITY);
|
||||
checkFloat(BigDecimal.valueOf(-1, -1000), Float.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
private static void testFloat16ValueExtremes() {
|
||||
checkFloat16(BigDecimal.valueOf(1, 1000), Float16.valueOf(0.0f));
|
||||
checkFloat16(BigDecimal.valueOf(-1, 1000), Float16.valueOf(-0.0f));
|
||||
checkFloat16(BigDecimal.valueOf(1, -1000), Float16.POSITIVE_INFINITY);
|
||||
checkFloat16(BigDecimal.valueOf(-1, -1000), Float16.NEGATIVE_INFINITY);
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
@ -254,6 +355,12 @@ public class DoubleFloatValueTests {
|
||||
testFloatValueNearMaxValue();
|
||||
testFloatValueRandom();
|
||||
testFloatValueExtremes();
|
||||
|
||||
testFloat16ValueNearMinValue();
|
||||
testFloat16ValueNearMinNormal();
|
||||
testFloat16ValueNearMaxValue();
|
||||
testFloat16ValueRandom();
|
||||
testFloat16ValueExtremes();
|
||||
}
|
||||
|
||||
}
|
||||
|
868
test/jdk/jdk/incubator/vector/BasicFloat16ArithTests.java
Normal file
868
test/jdk/jdk/incubator/vector/BasicFloat16ArithTests.java
Normal file
@ -0,0 +1,868 @@
|
||||
/*
|
||||
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8329817 8334432 8339076 8341260
|
||||
* @modules jdk.incubator.vector
|
||||
* @summary Basic tests of Float16 arithmetic and similar operations
|
||||
*/
|
||||
|
||||
import jdk.incubator.vector.Float16;
|
||||
import static jdk.incubator.vector.Float16.*;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
public class BasicFloat16ArithTests {
|
||||
private static float InfinityF = Float.POSITIVE_INFINITY;
|
||||
private static float NaNf = Float.NaN;
|
||||
|
||||
private static final float MAX_VAL_FP16 = 0x1.ffcp15f;
|
||||
|
||||
public static void main(String... args) {
|
||||
checkBitWise();
|
||||
checkHash();
|
||||
checkConstants();
|
||||
checkNegate();
|
||||
checkAbs();
|
||||
checkIsNaN();
|
||||
checkFiniteness();
|
||||
checkMinMax();
|
||||
checkArith();
|
||||
checkSqrt();
|
||||
checkGetExponent();
|
||||
checkUlp();
|
||||
checkValueOfDouble();
|
||||
checkValueOfLong();
|
||||
checkValueOfString();
|
||||
checkBaseConversionRoundTrip();
|
||||
FusedMultiplyAddTests.main();
|
||||
}
|
||||
|
||||
/*
|
||||
* The software implementation of Float16 delegates to float or
|
||||
* double operations for most of the actual computation. This
|
||||
* regression test takes that into account as it generally only
|
||||
* has limited testing to probe whether or not the proper
|
||||
* functionality is being delegated to.
|
||||
*
|
||||
* To make the test easier to read, float literals that are exact
|
||||
* upon conversion to Float16 are used for the test data.
|
||||
*
|
||||
* The float <-> Float16 conversions are well-tested from prior
|
||||
* work and are assumed to be correct by this regression test.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Verify handling of NaN representations
|
||||
*/
|
||||
private static void checkBitWise() {
|
||||
short nanImage = float16ToRawShortBits(Float16.NaN);
|
||||
|
||||
int exponent = 0x7c00;
|
||||
int sign = 0x8000;
|
||||
|
||||
// All-zeros significand with a max exponent are infinite
|
||||
// values, not NaN values.
|
||||
for(int i = 0x1; i <= 0x03ff; i++) {
|
||||
short posNaNasShort = (short)( exponent | i);
|
||||
short negNaNasShort = (short)(sign | exponent | i);
|
||||
|
||||
Float16 posf16 = shortBitsToFloat16(posNaNasShort);
|
||||
Float16 negf16 = shortBitsToFloat16(negNaNasShort);
|
||||
|
||||
// Mask-off high-order 16 bits to avoid sign extension woes
|
||||
checkInt(nanImage & 0xffff, float16ToShortBits(posf16) & 0xffff, "positive NaN");
|
||||
checkInt(nanImage & 0xffff, float16ToShortBits(negf16) & 0xffff, "negative NaN");
|
||||
|
||||
checkInt(posNaNasShort & 0xffff, float16ToRawShortBits(posf16) & 0xffff , "positive NaN");
|
||||
checkInt(negNaNasShort & 0xffff, float16ToRawShortBits(negf16) & 0xffff, "negative NaN");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify correct number of hashValue's from Float16's.
|
||||
*/
|
||||
private static void checkHash() {
|
||||
// Slightly over-allocate the HashSet.
|
||||
HashSet<Integer> set = HashSet.newHashSet(Short.MAX_VALUE - Short.MIN_VALUE + 1);
|
||||
|
||||
// Each non-NaN value should have a distinct hashCode. All NaN
|
||||
// values should share a single hashCode. Check the latter
|
||||
// property by verifying the overall count of entries in the
|
||||
// set.
|
||||
for(int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; i++) {
|
||||
Float16 f16 = Float16.shortBitsToFloat16((short)i);
|
||||
boolean addedToSet = set.add(f16.hashCode());
|
||||
|
||||
if (!Float16.isNaN(f16)) {
|
||||
if (!addedToSet) {
|
||||
throwRE("Existing hash value for " + f16);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// There are 2^16 = 65,536 total short values. Each of these
|
||||
// bit patterns is a valid representation of a Float16
|
||||
// value. However, NaNs have multiple possible encodings.
|
||||
// With an exponent = 0x7c00, each nonzero significand 0x1 to
|
||||
// 0x3ff is a NaN, for both positive and negative sign bits.
|
||||
//
|
||||
// Therefore, the total number of distinct hash codes for
|
||||
// Float16 values should be:
|
||||
// 65_536 - 2*(1_023) + 1 = 63_491
|
||||
|
||||
int setSize = set.size();
|
||||
if (setSize != 63_491) {
|
||||
throwRE("Unexpected number of distinct hash values " + setSize);
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkConstants() {
|
||||
checkInt(BYTES, 2, "Float16.BYTES");
|
||||
checkInt(MAX_EXPONENT, 15, "Float16.MAX_EXPONENT");
|
||||
checkInt(MIN_EXPONENT, -14, "Float16.MIN_EXPONENT");
|
||||
checkInt(PRECISION, 11, "Float16.PRECISION");
|
||||
checkInt(SIZE, 16, "Float16.SIZE");
|
||||
|
||||
checkFloat16(MIN_VALUE, 0x1.0p-24f, "Float16.MIN_VALUE");
|
||||
checkFloat16(MIN_NORMAL, 0x1.0p-14f, "Float16.MIN_NORMAL");
|
||||
checkFloat16(MAX_VALUE, 65504.0f, "Float16.MAX_VALUE");
|
||||
|
||||
checkFloat16(POSITIVE_INFINITY, InfinityF, "+infinity");
|
||||
checkFloat16(NEGATIVE_INFINITY, -InfinityF, "-infinity");
|
||||
checkFloat16(NaN, NaNf, "NaN");
|
||||
}
|
||||
|
||||
private static void checkInt(int value, int expected, String message) {
|
||||
if (value != expected) {
|
||||
throwRE(String.format("Didn't get expected value for %s;%nexpected %d, got %d",
|
||||
message, expected, value));
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkFloat16(Float16 value16, float expected, String message) {
|
||||
float value = value16.floatValue();
|
||||
if (Float.compare(value, expected) != 0) {
|
||||
throwRE(String.format("Didn't get expected value for %s;%nexpected %g (%a), got %g (%a)",
|
||||
message, expected, expected, value, value));
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkNegate() {
|
||||
float[][] testCases = {
|
||||
{-0.0f, 0.0f},
|
||||
{ 0.0f, -0.0f},
|
||||
|
||||
{-1.0f, 1.0f},
|
||||
{ 1.0f, -1.0f},
|
||||
|
||||
{ InfinityF, -InfinityF},
|
||||
{-InfinityF, InfinityF},
|
||||
|
||||
{NaNf, NaNf},
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
float arg = testCase[0];
|
||||
float expected = testCase[1];
|
||||
Float16 result = negate(valueOf(arg));
|
||||
|
||||
if (Float.compare(expected, result.floatValue()) != 0) {
|
||||
checkFloat16(result, expected, "negate(" + arg + ")");
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkAbs() {
|
||||
float[][] testCases = {
|
||||
{-0.0f, 0.0f},
|
||||
{ 0.0f, 0.0f},
|
||||
|
||||
{-1.0f, 1.0f},
|
||||
{ 1.0f, 1.0f},
|
||||
|
||||
{ InfinityF, InfinityF},
|
||||
{-InfinityF, InfinityF},
|
||||
|
||||
{NaNf, NaNf},
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
float arg = testCase[0];
|
||||
float expected = testCase[1];
|
||||
Float16 result = abs(valueOf(arg));
|
||||
|
||||
if (Float.compare(expected, result.floatValue()) != 0) {
|
||||
checkFloat16(result, expected, "abs(" + arg + ")");
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkIsNaN() {
|
||||
if (!isNaN(NaN)) {
|
||||
throwRE("Float16.isNaN() returns false for a NaN");
|
||||
}
|
||||
|
||||
float[] testCases = {
|
||||
-InfinityF,
|
||||
InfinityF,
|
||||
-0.0f,
|
||||
+0.0f,
|
||||
1.0f,
|
||||
-1.0f,
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
boolean result = isNaN(valueOf(testCase));
|
||||
if (result) {
|
||||
throwRE("isNaN returned true for " + testCase);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkFiniteness() {
|
||||
float[] infinities = {
|
||||
-InfinityF,
|
||||
InfinityF,
|
||||
};
|
||||
|
||||
for(var infinity : infinities) {
|
||||
boolean result1 = isFinite(valueOf(infinity));
|
||||
boolean result2 = isInfinite(valueOf(infinity));
|
||||
|
||||
if (result1) {
|
||||
throwRE("Float16.isFinite returned true for " + infinity);
|
||||
}
|
||||
|
||||
if (!result2) {
|
||||
throwRE("Float16.isInfinite returned false for " + infinity);
|
||||
}
|
||||
}
|
||||
|
||||
if (isFinite(NaN)) {
|
||||
throwRE("Float16.isFinite() returns true for a NaN");
|
||||
}
|
||||
|
||||
if (isInfinite(NaN)) {
|
||||
throwRE("Float16.isInfinite() returns true for a NaN");
|
||||
}
|
||||
|
||||
float[] finities = {
|
||||
-0.0f,
|
||||
+0.0f,
|
||||
1.0f,
|
||||
-1.0f,
|
||||
};
|
||||
|
||||
for(var finity : finities) {
|
||||
boolean result1 = isFinite(valueOf(finity));
|
||||
boolean result2 = isInfinite(valueOf(finity));
|
||||
|
||||
if (!result1) {
|
||||
throwRE("Float16.isFinite returned true for " + finity);
|
||||
}
|
||||
|
||||
if (result2) {
|
||||
throwRE("Float16.isInfinite returned true for " + finity);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkMinMax() {
|
||||
float small = 1.0f;
|
||||
float large = 2.0f;
|
||||
|
||||
if (min(valueOf(small), valueOf(large)).floatValue() != small) {
|
||||
throwRE(String.format("min(%g, %g) not equal to %g)",
|
||||
small, large, small));
|
||||
}
|
||||
|
||||
if (max(valueOf(small), valueOf(large)).floatValue() != large) {
|
||||
throwRE(String.format("max(%g, %g) not equal to %g)",
|
||||
small, large, large));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Cursory checks to make sure correct operation is being called
|
||||
* with arguments in proper order.
|
||||
*/
|
||||
private static void checkArith() {
|
||||
float a = 1.0f;
|
||||
Float16 a16 = valueOf(a);
|
||||
|
||||
float b = 2.0f;
|
||||
Float16 b16 = valueOf(b);
|
||||
|
||||
if (add(a16, b16).floatValue() != (a + b)) {
|
||||
throwRE("failure with " + a16 + " + " + b16);
|
||||
}
|
||||
if (add(b16, a16).floatValue() != (b + a)) {
|
||||
throwRE("failure with " + b16 + " + " + a16);
|
||||
}
|
||||
|
||||
if (subtract(a16, b16).floatValue() != (a - b)) {
|
||||
throwRE("failure with " + a16 + " - " + b16);
|
||||
}
|
||||
if (subtract(b16, a16).floatValue() != (b - a)) {
|
||||
throwRE("failure with " + b16 + " - " + a16);
|
||||
}
|
||||
|
||||
if (multiply(a16, b16).floatValue() != (a * b)) {
|
||||
throwRE("failure with " + a16 + " * " + b16);
|
||||
}
|
||||
if (multiply(b16, a16).floatValue() != (b * a)) {
|
||||
throwRE("failure with " + b16 + " * " + a16);
|
||||
}
|
||||
|
||||
if (divide(a16, b16).floatValue() != (a / b)) {
|
||||
throwRE("failure with " + a16 + " / " + b16);
|
||||
}
|
||||
if (divide(b16, a16).floatValue() != (b / a)) {
|
||||
throwRE("failure with " + b16 + " / " + a16);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkSqrt() {
|
||||
float[][] testCases = {
|
||||
{-0.0f, -0.0f},
|
||||
{ 0.0f, 0.0f},
|
||||
|
||||
{1.0f, 1.0f},
|
||||
{4.0f, 2.0f},
|
||||
{9.0f, 3.0f},
|
||||
|
||||
{ InfinityF, InfinityF},
|
||||
{-InfinityF, NaNf},
|
||||
|
||||
{NaNf, NaNf},
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
float arg = testCase[0];
|
||||
float expected = testCase[1];
|
||||
Float16 result = sqrt(valueOf(arg));
|
||||
|
||||
if (Float.compare(expected, result.floatValue()) != 0) {
|
||||
checkFloat16(result, expected, "sqrt(" + arg + ")");
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkGetExponent() {
|
||||
float[][] testCases = {
|
||||
// Non-finite values
|
||||
{ InfinityF, MAX_EXPONENT + 1},
|
||||
{-InfinityF, MAX_EXPONENT + 1},
|
||||
{ NaNf, MAX_EXPONENT + 1},
|
||||
|
||||
// Subnormal and almost subnormal values
|
||||
{-0.0f, MIN_EXPONENT - 1},
|
||||
{+0.0f, MIN_EXPONENT - 1},
|
||||
{ 0x1.0p-24f, MIN_EXPONENT - 1}, // Float16.MIN_VALUE
|
||||
{-0x1.0p-24f, MIN_EXPONENT - 1}, // Float16.MIN_VALUE
|
||||
{ 0x1.0p-14f, MIN_EXPONENT}, // Float16.MIN_NORMAL
|
||||
{-0x1.0p-14f, MIN_EXPONENT}, // Float16.MIN_NORMAL
|
||||
|
||||
// Normal values
|
||||
{ 1.0f, 0},
|
||||
{ 2.0f, 1},
|
||||
{ 4.0f, 2},
|
||||
|
||||
{MAX_VAL_FP16*0.5f, MAX_EXPONENT - 1},
|
||||
{MAX_VAL_FP16, MAX_EXPONENT},
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
float arg = testCase[0];
|
||||
float expected = testCase[1];
|
||||
// Exponents are in-range for Float16
|
||||
Float16 result = valueOf(getExponent(valueOf(arg)));
|
||||
|
||||
if (Float.compare(expected, result.floatValue()) != 0) {
|
||||
checkFloat16(result, expected, "getExponent(" + arg + ")");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private static void checkUlp() {
|
||||
float[][] testCases = {
|
||||
{ InfinityF, InfinityF},
|
||||
{-InfinityF, InfinityF},
|
||||
{ NaNf, NaNf},
|
||||
|
||||
// Zeros, subnormals, and MIN_VALUE all have MIN_VALUE as an ulp.
|
||||
{-0.0f, 0x1.0p-24f},
|
||||
{+0.0f, 0x1.0p-24f},
|
||||
{ 0x1.0p-24f, 0x1.0p-24f},
|
||||
{-0x1.0p-24f, 0x1.0p-24f},
|
||||
{ 0x1.0p-14f, 0x1.0p-24f},
|
||||
{-0x1.0p-14f, 0x1.0p-24f},
|
||||
|
||||
// ulp is 10 bits away
|
||||
{0x1.0p0f, 0x0.004p0f}, // 1.0f
|
||||
{0x1.0p1f, 0x0.004p1f}, // 2.0f
|
||||
{0x1.0p2f, 0x0.004p2f}, // 4.0f
|
||||
|
||||
{MAX_VAL_FP16*0.5f, 0x0.004p14f},
|
||||
{MAX_VAL_FP16, 0x0.004p15f},
|
||||
};
|
||||
|
||||
for(var testCase : testCases) {
|
||||
float arg = testCase[0];
|
||||
float expected = testCase[1];
|
||||
// Exponents are in-range for Float16
|
||||
Float16 result = ulp(valueOf(arg));
|
||||
|
||||
if (Float.compare(expected, result.floatValue()) != 0) {
|
||||
checkFloat16(result, expected, "ulp(" + arg + ")");
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private static void throwRE(String message) {
|
||||
throw new RuntimeException(message);
|
||||
}
|
||||
|
||||
private static void checkValueOfDouble() {
|
||||
/*
|
||||
* Check that double -> Float16 conversion rounds properly
|
||||
* around the midway point for each finite Float16 value by
|
||||
* looping over the positive values and checking the negations
|
||||
* along the way.
|
||||
*/
|
||||
|
||||
String roundUpMsg = "Didn't get half-way case rounding down";
|
||||
String roundDownMsg = "Didn't get half-way case rounding up";
|
||||
|
||||
for(int i = 0; i <= Short.MAX_VALUE; i++ ) {
|
||||
boolean isEven = ((i & 0x1) == 0);
|
||||
Float16 f16 = Float16.shortBitsToFloat16((short)i);
|
||||
Float16 f16Neg = negate(f16);
|
||||
|
||||
if (!isFinite(f16))
|
||||
continue;
|
||||
|
||||
// System.out.println("\t" + toHexString(f16));
|
||||
|
||||
Float16 ulp = ulp(f16);
|
||||
double halfWay = f16.doubleValue() + ulp.doubleValue() * 0.5;
|
||||
|
||||
// Under the round to nearest even rounding policy, the
|
||||
// half-way case should round down to the starting value
|
||||
// if the starting value is even; otherwise, it should round up.
|
||||
float roundedBack = valueOf(halfWay).floatValue();
|
||||
float roundedBackNeg = valueOf(-halfWay).floatValue();
|
||||
|
||||
if (isEven) {
|
||||
checkFloat16(f16, roundedBack, roundDownMsg);
|
||||
checkFloat16(f16Neg, roundedBackNeg, roundDownMsg);
|
||||
} else {
|
||||
checkFloat16(add(f16, ulp), roundedBack, roundUpMsg);
|
||||
checkFloat16(subtract(f16Neg, ulp), roundedBackNeg, roundUpMsg);
|
||||
}
|
||||
|
||||
// Should always round down
|
||||
double halfWayNextDown = Math.nextDown(halfWay);
|
||||
checkFloat16(f16, valueOf(halfWayNextDown).floatValue(), roundDownMsg);
|
||||
checkFloat16(f16Neg, valueOf(-halfWayNextDown).floatValue(), roundDownMsg);
|
||||
|
||||
// Should always round up
|
||||
double halfWayNextUp = Math.nextUp(halfWay);
|
||||
checkFloat16(add(f16, ulp), valueOf( halfWayNextUp).floatValue(), roundUpMsg);
|
||||
checkFloat16(subtract(f16Neg, ulp), valueOf(-halfWayNextUp).floatValue(), roundUpMsg);
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkValueOfLong() {
|
||||
checkFloat16(valueOf(-65_521), Float.NEGATIVE_INFINITY, "-infinity");
|
||||
checkFloat16(valueOf(-65_520), Float.NEGATIVE_INFINITY, "-infinity");
|
||||
checkFloat16(valueOf(-65_519), -MAX_VALUE.floatValue(), "-MAX_VALUE");
|
||||
checkFloat16(valueOf(65_519), MAX_VALUE.floatValue(), "MAX_VALUE");
|
||||
checkFloat16(valueOf(65_520), Float.POSITIVE_INFINITY, "+infinity");
|
||||
checkFloat16(valueOf(65_521), Float.POSITIVE_INFINITY, "+infinity");
|
||||
}
|
||||
|
||||
private static void checkValueOfString() {
|
||||
String2Float16Case[] testCases = {
|
||||
new String2Float16Case( "NaN", NaNf),
|
||||
new String2Float16Case("+NaN", NaNf),
|
||||
new String2Float16Case("-NaN", NaNf),
|
||||
|
||||
new String2Float16Case("+Infinity", +InfinityF),
|
||||
new String2Float16Case("-Infinity", -InfinityF),
|
||||
|
||||
new String2Float16Case( "0.0", 0.0f),
|
||||
new String2Float16Case("+0.0", 0.0f),
|
||||
new String2Float16Case("-0.0", -0.0f),
|
||||
|
||||
// Decimal signed integers are accepted as input; hex
|
||||
// signed integers are not, see negative test cases below.
|
||||
new String2Float16Case( "1", 1.0f),
|
||||
new String2Float16Case("-1", -1.0f),
|
||||
|
||||
new String2Float16Case( "12", 12.0f),
|
||||
new String2Float16Case("-12", -12.0f),
|
||||
|
||||
new String2Float16Case( "123", 123.0f),
|
||||
new String2Float16Case("-123", -123.0f),
|
||||
|
||||
new String2Float16Case( "1.0", 1.0f),
|
||||
new String2Float16Case("-1.0", -1.0f),
|
||||
|
||||
// Check for FloatTypeSuffix handling
|
||||
new String2Float16Case( "1.5f", 1.5f),
|
||||
new String2Float16Case( "1.5F", 1.5f),
|
||||
new String2Float16Case( "1.5D", 1.5f),
|
||||
new String2Float16Case( "1.5d", 1.5f),
|
||||
|
||||
new String2Float16Case("65504.0", 65504.0f), // Float16.MAX_VALUE
|
||||
|
||||
new String2Float16Case("65520.0", InfinityF), // Float16.MAX_VALUE + 0.5*ulp
|
||||
|
||||
new String2Float16Case("65520.01", InfinityF), // Float16.MAX_VALUE + > 0.5*ulp
|
||||
new String2Float16Case("65520.001", InfinityF), // Float16.MAX_VALUE + > 0.5*ulp
|
||||
new String2Float16Case("65520.0001", InfinityF), // Float16.MAX_VALUE + > 0.5*ulp
|
||||
new String2Float16Case("65520.00000000001", InfinityF), // Float16.MAX_VALUE + > 0.5*ulp
|
||||
|
||||
new String2Float16Case("65519.99999999999", 65504.0f), // Float16.MAX_VALUE + < 0.5*ulp
|
||||
new String2Float16Case("0x1.ffdffffffffffp15", 65504.0f),
|
||||
new String2Float16Case("0x1.ffdfffffffffp15", 65504.0f),
|
||||
|
||||
|
||||
new String2Float16Case("65519.999999999999", 65504.0f),
|
||||
new String2Float16Case("65519.9999999999999", 65504.0f),
|
||||
new String2Float16Case("65519.99999999999999", 65504.0f),
|
||||
new String2Float16Case("65519.999999999999999", 65504.0f),
|
||||
|
||||
// Float16.MAX_VALUE + < 0.5*ulp
|
||||
new String2Float16Case("65519.9999999999999999999999999999999999999", 65504.0f),
|
||||
|
||||
// Near MAX_VALUE - 0.5 ulp
|
||||
new String2Float16Case("65488.0", 65472.0f),
|
||||
new String2Float16Case("65487.9999", 65472.0f),
|
||||
new String2Float16Case("65487.99999999", 65472.0f),
|
||||
new String2Float16Case("65487.9999999999999999", 65472.0f),
|
||||
|
||||
new String2Float16Case("65488.000001", MAX_VAL_FP16),
|
||||
|
||||
new String2Float16Case("65536.0", InfinityF), // Float16.MAX_VALUE + ulp
|
||||
|
||||
// Hex values
|
||||
new String2Float16Case("0x1p2", 0x1.0p2f),
|
||||
new String2Float16Case("0x1p2f", 0x1.0p2f),
|
||||
new String2Float16Case("0x1p2d", 0x1.0p2f),
|
||||
new String2Float16Case("0x1.0p1", 0x1.0p1f),
|
||||
|
||||
new String2Float16Case("-0x1p2", -0x1.0p2f),
|
||||
new String2Float16Case("0x3.45p12", 0x3.45p12f),
|
||||
|
||||
new String2Float16Case("0x3.4500000001p12", 0x3.45p12f),
|
||||
|
||||
// Near half-way double + float cases in hex
|
||||
new String2Float16Case("0x1.ffdfffffffffffffffffffffffffffffffffffp15", 65504.0f),
|
||||
|
||||
};
|
||||
|
||||
for(String2Float16Case testCase : testCases) {
|
||||
String input = testCase.input();
|
||||
float expected = testCase.expected();
|
||||
Float16 result = Float16.valueOf(input);
|
||||
checkFloat16(result, expected, "Float16.valueOf(String) " + input);
|
||||
}
|
||||
|
||||
List<String> negativeCases = List.of("0x1",
|
||||
"-0x1",
|
||||
"0x12",
|
||||
"-0x12");
|
||||
|
||||
for(String negativeCase : negativeCases) {
|
||||
try {
|
||||
Float16 f16 = Float16.valueOf(negativeCase);
|
||||
throwRE("Did not get expected exception for input " + negativeCase);
|
||||
} catch (NumberFormatException nfe) {
|
||||
; // Expected
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
private static record String2Float16Case(String input, float expected) {
|
||||
}
|
||||
|
||||
private static void checkBaseConversionRoundTrip() {
|
||||
checkFloat16(Float16.NaN,
|
||||
Float16.valueOf("NaN").floatValue(),
|
||||
"base conversion of NaN");
|
||||
|
||||
// For each non-NaN value, make sure
|
||||
// value -> string -> value
|
||||
// sequence of conversions gives the expected result.
|
||||
|
||||
for(int i = Short.MIN_VALUE; i <= Short.MAX_VALUE; i++) {
|
||||
Float16 f16 = Float16.shortBitsToFloat16((short)i);
|
||||
if (Float16.isNaN(f16))
|
||||
continue;
|
||||
|
||||
checkFloat16(f16,
|
||||
Float16.valueOf(Float16.toString(f16)).floatValue(),
|
||||
"base conversion");
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
private static class FusedMultiplyAddTests {
|
||||
public static void main(String... args) {
|
||||
testZeroNanInfCombos();
|
||||
testNonFinite();
|
||||
testZeroes();
|
||||
testSimple();
|
||||
testRounding();
|
||||
}
|
||||
|
||||
private static void testZeroNanInfCombos() {
|
||||
float [] testInputs = {
|
||||
Float.NaN,
|
||||
-InfinityF,
|
||||
+InfinityF,
|
||||
-0.0f,
|
||||
+0.0f,
|
||||
};
|
||||
|
||||
for (float i : testInputs) {
|
||||
for (float j : testInputs) {
|
||||
for (float k : testInputs) {
|
||||
testFusedMacCase(i, j, k, Math.fma(i, j, k));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void testNonFinite() {
|
||||
float [][] testCases = {
|
||||
{1.0f, InfinityF, 2.0f,
|
||||
InfinityF},
|
||||
|
||||
{1.0f, 2.0f, InfinityF,
|
||||
InfinityF},
|
||||
|
||||
{InfinityF, 1.0f, InfinityF,
|
||||
InfinityF},
|
||||
|
||||
{0x1.ffcp14f, 2.0f, -InfinityF,
|
||||
-InfinityF},
|
||||
|
||||
{InfinityF, 1.0f, -InfinityF,
|
||||
NaNf},
|
||||
|
||||
{-InfinityF, 1.0f, InfinityF,
|
||||
NaNf},
|
||||
|
||||
{1.0f, NaNf, 2.0f,
|
||||
NaNf},
|
||||
|
||||
{1.0f, 2.0f, NaNf,
|
||||
NaNf},
|
||||
|
||||
{InfinityF, 2.0f, NaNf,
|
||||
NaNf},
|
||||
|
||||
{NaNf, 2.0f, InfinityF,
|
||||
NaNf},
|
||||
};
|
||||
|
||||
for (float[] testCase: testCases) {
|
||||
testFusedMacCase(testCase[0], testCase[1], testCase[2], testCase[3]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testZeroes() {
|
||||
float [][] testCases = {
|
||||
{+0.0f, +0.0f, +0.0f,
|
||||
+0.0f},
|
||||
|
||||
{-0.0f, +0.0f, +0.0f,
|
||||
+0.0f},
|
||||
|
||||
{+0.0f, +0.0f, -0.0f,
|
||||
+0.0f},
|
||||
|
||||
{+0.0f, +0.0f, -0.0f,
|
||||
+0.0f},
|
||||
|
||||
{-0.0f, +0.0f, -0.0f,
|
||||
-0.0f},
|
||||
|
||||
{-0.0f, -0.0f, -0.0f,
|
||||
+0.0f},
|
||||
|
||||
{-1.0f, +0.0f, -0.0f,
|
||||
-0.0f},
|
||||
|
||||
{-1.0f, +0.0f, +0.0f,
|
||||
+0.0f},
|
||||
|
||||
{-2.0f, +0.0f, -0.0f,
|
||||
-0.0f},
|
||||
};
|
||||
|
||||
for (float[] testCase: testCases) {
|
||||
testFusedMacCase(testCase[0], testCase[1], testCase[2], testCase[3]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testSimple() {
|
||||
final float ulpOneFp16 = ulp(valueOf(1.0f)).floatValue();
|
||||
|
||||
float [][] testCases = {
|
||||
{1.0f, 2.0f, 3.0f,
|
||||
5.0f},
|
||||
|
||||
{1.0f, 2.0f, -2.0f,
|
||||
0.0f},
|
||||
|
||||
{5.0f, 5.0f, -25.0f,
|
||||
0.0f},
|
||||
|
||||
{0.5f*MAX_VAL_FP16, 2.0f, -0.5f*MAX_VAL_FP16,
|
||||
0.5f*MAX_VAL_FP16},
|
||||
|
||||
{MAX_VAL_FP16, 2.0f, -MAX_VAL_FP16,
|
||||
MAX_VAL_FP16},
|
||||
|
||||
{MAX_VAL_FP16, 2.0f, 1.0f,
|
||||
InfinityF},
|
||||
|
||||
{(1.0f + ulpOneFp16),
|
||||
(1.0f + ulpOneFp16),
|
||||
-1.0f - 2.0f*ulpOneFp16,
|
||||
ulpOneFp16 * ulpOneFp16},
|
||||
|
||||
};
|
||||
|
||||
for (float[] testCase: testCases) {
|
||||
testFusedMacCase(testCase[0], testCase[1], testCase[2], testCase[3]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testRounding() {
|
||||
final float ulpOneFp16 = ulp(valueOf(1.0f)).floatValue();
|
||||
|
||||
float [][] testCases = {
|
||||
// The product is equal to
|
||||
// (MAX_VALUE + 1/2 * ulp(MAX_VALUE) + MAX_VALUE = (0x1.ffcp15 + 0x0.002p15)+ 0x1.ffcp15
|
||||
// so overflows.
|
||||
{0x1.3p1f, 0x1.afp15f, -MAX_VAL_FP16,
|
||||
InfinityF},
|
||||
|
||||
// Product exactly equals 0x1.ffep15, the overflow
|
||||
// threshold; subtracting a non-zero finite value will
|
||||
// result in MAX_VALUE, adding zero or a positive
|
||||
// value will overflow.
|
||||
{0x1.2p10f, 0x1.c7p5f, -0x1.0p-14f,
|
||||
MAX_VAL_FP16},
|
||||
|
||||
{0x1.2p10f, 0x1.c7p5f, -0.0f,
|
||||
InfinityF},
|
||||
|
||||
{0x1.2p10f, 0x1.c7p5f, +0.0f,
|
||||
InfinityF},
|
||||
|
||||
{0x1.2p10f, 0x1.c7p5f, +0x1.0p-14f,
|
||||
InfinityF},
|
||||
|
||||
{0x1.2p10f, 0x1.c7p5f, InfinityF,
|
||||
InfinityF},
|
||||
|
||||
// PRECISION bits in the subnormal intermediate product
|
||||
{0x1.ffcp-14f, 0x1.0p-24f, 0x1.0p13f, // Can be held exactly
|
||||
0x1.0p13f},
|
||||
|
||||
{0x1.ffcp-14f, 0x1.0p-24f, 0x1.0p14f, // *Cannot* be held exactly
|
||||
0x1.0p14f},
|
||||
|
||||
// Check values where the exact result cannot be
|
||||
// exactly stored in a double.
|
||||
{0x1.0p-24f, 0x1.0p-24f, 0x1.0p10f,
|
||||
0x1.0p10f},
|
||||
|
||||
{0x1.0p-24f, 0x1.0p-24f, 0x1.0p14f,
|
||||
0x1.0p14f},
|
||||
|
||||
// Check subnormal results, underflow to zero
|
||||
{0x1.0p-24f, -0.5f, 0x1.0p-24f,
|
||||
0.0f},
|
||||
|
||||
// Check subnormal results, underflow to zero
|
||||
{0x1.0p-24f, -0.5f, 0.0f,
|
||||
-0.0f},
|
||||
};
|
||||
|
||||
for (float[] testCase: testCases) {
|
||||
testFusedMacCase(testCase[0], testCase[1], testCase[2], testCase[3]);
|
||||
}
|
||||
}
|
||||
|
||||
private static void testFusedMacCase(float input1, float input2, float input3, float expected) {
|
||||
Float16 a = valueOf(input1);
|
||||
Float16 b = valueOf(input2);
|
||||
Float16 c = valueOf(input3);
|
||||
Float16 d = valueOf(expected);
|
||||
|
||||
test("Float16.fma(float)", a, b, c, Float16.fma(a, b, c), d);
|
||||
|
||||
// Permute first two inputs
|
||||
test("Float16.fma(float)", b, a, c, Float16.fma(b, a, c), d);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
private static void test(String testName,
|
||||
Float16 input1, Float16 input2, Float16 input3,
|
||||
Float16 result, Float16 expected) {
|
||||
if (Float16.compare(expected, result ) != 0) {
|
||||
System.err.println("Failure for " + testName + ":\n" +
|
||||
"\tFor inputs " + input1 + "\t(" + toHexString(input1) + ") and "
|
||||
+ input2 + "\t(" + toHexString(input2) + ") and"
|
||||
+ input3 + "\t(" + toHexString(input3) + ")\n" +
|
||||
"\texpected " + expected + "\t(" + toHexString(expected) + ")\n" +
|
||||
"\tgot " + result + "\t(" + toHexString(result) + ").");
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user