From 7318b22209a83c593176ec600647a9b050362932 Mon Sep 17 00:00:00 2001 From: Joe Darcy Date: Tue, 26 Jul 2022 16:54:32 +0000 Subject: [PATCH] 8289551: Conversions between bit representations of half precision values and floats Reviewed-by: psandoz, jrose --- .../share/classes/java/lang/Float.java | 193 ++++++++ .../java/lang/Float/Binary16Conversion.java | 422 ++++++++++++++++++ .../lang/Float/Binary16ConversionNaN.java | 88 ++++ 3 files changed, 703 insertions(+) create mode 100644 test/jdk/java/lang/Float/Binary16Conversion.java create mode 100644 test/jdk/java/lang/Float/Binary16ConversionNaN.java diff --git a/src/java.base/share/classes/java/lang/Float.java b/src/java.base/share/classes/java/lang/Float.java index ae3f99d4e2d..78eaddd611b 100644 --- a/src/java.base/share/classes/java/lang/Float.java +++ b/src/java.base/share/classes/java/lang/Float.java @@ -30,6 +30,7 @@ import java.lang.constant.Constable; import java.lang.constant.ConstantDesc; import java.util.Optional; +import jdk.internal.math.FloatConsts; import jdk.internal.math.FloatingDecimal; import jdk.internal.math.FloatToDecimal; import jdk.internal.vm.annotation.IntrinsicCandidate; @@ -975,6 +976,198 @@ public final class Float extends Number @IntrinsicCandidate public static native float intBitsToFloat(int bits); + /** + * {@return the {@code float} value closest to the numerical value + * of the argument, a floating-point binary16 value encoded in a + * {@code short}} The conversion is exact; all binary16 values can + * be exactly represented in {@code float}. + * + * Special cases: + * + * + *

IEEE 754 binary16 format

+ * The IEEE 754 standard defines binary16 as a 16-bit format, along + * with the 32-bit binary32 format (corresponding to the {@code + * float} type) and the 64-bit binary64 format (corresponding to + * the {@code double} type). The binary16 format is similar to the + * other IEEE 754 formats, except smaller, having all the usual + * IEEE 754 values such as NaN, signed infinities, signed zeros, + * and subnormals. The parameters (JLS {@jls 4.2.3}) for the + * binary16 format are N = 11 precision bits, K = 5 exponent bits, + * Emax = 15, and + * Emin = -14. + * + * @apiNote + * This method corresponds to the convertFormat operation defined + * in IEEE 754 from the binary16 format to the binary32 format. + * The operation of this method is analogous to a primitive + * widening conversion (JLS {@jls 5.1.2}). + * + * @param floatBinary16 the binary16 value to convert to {@code float} + * @since 20 + */ + // @IntrinsicCandidate + public static float float16ToFloat(short floatBinary16) { + /* + * The binary16 format has 1 sign bit, 5 exponent bits, and 10 + * significand bits. The exponent bias is 15. + */ + int bin16arg = (int)floatBinary16; + int bin16SignBit = 0x8000 & bin16arg; + int bin16ExpBits = 0x7c00 & bin16arg; + int bin16SignifBits = 0x03FF & bin16arg; + + // Shift left difference in the number of significand bits in + // the float and binary16 formats + final int SIGNIF_SHIFT = (FloatConsts.SIGNIFICAND_WIDTH - 11); + + float sign = (bin16SignBit != 0) ? -1.0f : 1.0f; + + // Extract binary16 exponent, remove its bias, add in the bias + // of a float exponent and shift to correct bit location + // (significand width includes the implicit bit so shift one + // less). + int bin16Exp = (bin16ExpBits >> 10) - 15; + if (bin16Exp == -15) { + // For subnormal binary16 values and 0, the numerical + // value is 2^24 * the significand as an integer (no + // implicit bit). + return sign * (0x1p-24f * bin16SignifBits); + } else if (bin16Exp == 16) { + return (bin16SignifBits == 0) ? + sign * Float.POSITIVE_INFINITY : + Float.intBitsToFloat((bin16SignBit << 16) | + 0x7f80_0000 | + // Preserve NaN signif bits + ( bin16SignifBits << SIGNIF_SHIFT )); + } + + assert -15 < bin16Exp && bin16Exp < 16; + + int floatExpBits = (bin16Exp + FloatConsts.EXP_BIAS) + << (FloatConsts.SIGNIFICAND_WIDTH - 1); + + // Compute and combine result sign, exponent, and significand bits. + return Float.intBitsToFloat((bin16SignBit << 16) | + floatExpBits | + (bin16SignifBits << SIGNIF_SHIFT)); + } + + /** + * {@return the floating-point binary16 value, encoded in a {@code + * short}, closest in value to the argument} + * The conversion is computed under the {@linkplain + * java.math.RoundingMode#HALF_EVEN round to nearest even rounding + * mode}. + * + * Special cases: + * + * + * The binary16 format is discussed in + * more detail in the {@link #float16ToFloat} method. + * + * @apiNote + * This method corresponds to the convertFormat operation defined + * in IEEE 754 from the binary32 format to the binary16 format. + * The operation of this method is analogous to a primitive + * narrowing conversion (JLS {@jls 5.1.3}). + * + * @param f the {@code float} value to convert to binary16 + * @since 20 + */ + // @IntrinsicCandidate + public static short floatToFloat16(float f) { + int doppel = Float.floatToRawIntBits(f); + short sign_bit = (short)((doppel & 0x8000_0000) >> 16); + + if (Float.isNaN(f)) { + // Preserve sign and attempt to preserve significand bits + return (short)(sign_bit + | 0x7c00 // max exponent + 1 + // Preserve high order bit of float NaN in the + // binary16 result NaN (tenth bit); OR in remaining + // bits into lower 9 bits of binary 16 significand. + | (doppel & 0x007f_e000) >> 13 // 10 bits + | (doppel & 0x0000_1ff0) >> 4 // 9 bits + | (doppel & 0x0000_000f)); // 4 bits + } + + float abs_f = Math.abs(f); + + // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp + if (abs_f >= (0x1.ffcp15f + 0x0.002p15f) ) { + return (short)(sign_bit | 0x7c00); // Positive or negative infinity + } + + // Smallest magnitude nonzero representable binary16 value + // is equal to 0x1.0p-24; half-way and smaller rounds to zero. + if (abs_f <= 0x1.0p-24f * 0.5f) { // Covers float zeros and subnormals. + return sign_bit; // Positive or negative zero + } + + // Dealing with finite values in exponent range of binary16 + // (when rounding is done, could still round up) + int exp = Math.getExponent(f); + assert -25 <= exp && exp <= 15; + + // For binary16 subnormals, beside forcing exp to -15, retain + // the difference expdelta = E_min - exp. This is the excess + // shift value, in addition to 13, to be used in the + // computations below. Further the (hidden) msb with value 1 + // in f must be involved as well. + int expdelta = 0; + int msb = 0x0000_0000; + if (exp < -14) { + expdelta = -14 - exp; + exp = -15; + msb = 0x0080_0000; + } + int f_signif_bits = doppel & 0x007f_ffff | msb; + + // Significand bits as if using rounding to zero (truncation). + short signif_bits = (short)(f_signif_bits >> (13 + expdelta)); + + // For round to nearest even, determining whether or not to + // round up (in magnitude) is a function of the least + // significant bit (LSB), the next bit position (the round + // position), and the sticky bit (whether there are any + // nonzero bits in the exact result to the right of the round + // digit). An increment occurs in three cases: + // + // LSB Round Sticky + // 0 1 1 + // 1 1 0 + // 1 1 1 + // See "Computer Arithmetic Algorithms," Koren, Table 4.9 + + int lsb = f_signif_bits & (1 << 13 + expdelta); + int round = f_signif_bits & (1 << 12 + expdelta); + int sticky = f_signif_bits & ((1 << 12 + expdelta) - 1); + + if (round != 0 && ((lsb | sticky) != 0 )) { + signif_bits++; + } + + // No bits set in significand beyond the *first* exponent bit, + // not just the sigificand; quantity is added to the exponent + // to implement a carry out from rounding the significand. + assert (0xf800 & signif_bits) == 0x0; + + return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ); + } + /** * Compares two {@code Float} objects numerically. * diff --git a/test/jdk/java/lang/Float/Binary16Conversion.java b/test/jdk/java/lang/Float/Binary16Conversion.java new file mode 100644 index 00000000000..450db8f6139 --- /dev/null +++ b/test/jdk/java/lang/Float/Binary16Conversion.java @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8289551 + * @summary Verify conversion between float and the binary16 format + * @library ../Math + * @build FloatConsts + * @run main Binary16Conversion + */ + +public class Binary16Conversion { + public static void main(String... argv) { + int errors = 0; + errors += binary16RoundTrip(); + // Note that helper methods do sign-symmetric testing + errors += binary16CardinalValues(); + errors += roundFloatToBinary16(); + errors += roundFloatToBinary16HalfWayCases(); + errors += roundFloatToBinary16FullBinade(); + errors += alternativeImplementation(); + + if (errors > 0) + throw new RuntimeException(errors + " errors"); + } + + /* + * Put all 16-bit values through a conversion loop and make sure + * the values are preserved (NaN bit patterns notwithstanding). + */ + private static int binary16RoundTrip() { + int errors = 0; + for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { + short s = (short)i; + float f = Float.float16ToFloat(s); + short s2 = Float.floatToFloat16(f); + + if (!Binary16.equivalent(s, s2)) { + errors++; + System.out.println("Roundtrip failure on " + + Integer.toHexString(0xFFFF & (int)s) + + "\t got back " + Integer.toHexString(0xFFFF & (int)s2)); + } + } + return errors; + } + + private static int binary16CardinalValues() { + int errors = 0; + // Encode short value for different binary16 cardinal values as an + // integer-valued float. + float[][] testCases = { + {Binary16.POSITIVE_ZERO, +0.0f}, + {Binary16.MIN_VALUE, 0x1.0p-24f}, + {Binary16.MAX_SUBNORMAL, 0x1.ff8p-15f}, + {Binary16.MIN_NORMAL, 0x1.0p-14f}, + {Binary16.ONE, 1.0f}, + {Binary16.MAX_VALUE, 65504.0f}, + {Binary16.POSITIVE_INFINITY, Float.POSITIVE_INFINITY}, + }; + + // Check conversions in both directions + + // short -> float + for (var testCase : testCases) { + errors += compareAndReportError((short)testCase[0], + testCase[1]); + } + + // float -> short + for (var testCase : testCases) { + errors += compareAndReportError(testCase[1], + (short)testCase[0]); + } + + return errors; + } + + private static int roundFloatToBinary16() { + int errors = 0; + + float[][] testCases = { + // Test all combinations of LSB, round, and sticky bit + + // LSB = 0, test combination of round and sticky + {0x1.ff8000p-1f, (short)0x3bfe}, // round = 0, sticky = 0 + {0x1.ff8010p-1f, (short)0x3bfe}, // round = 0, sticky = 1 + {0x1.ffa000p-1f, (short)0x3bfe}, // round = 1, sticky = 0 + {0x1.ffa010p-1f, (short)0x3bff}, // round = 1, sticky = 1 => ++ + + // LSB = 1, test combination of round and sticky + {0x1.ffc000p-1f, Binary16.ONE-1}, // round = 0, sticky = 0 + {0x1.ffc010p-1f, Binary16.ONE-1}, // round = 0, sticky = 1 + {0x1.ffe000p-1f, Binary16.ONE}, // round = 1, sticky = 0 => ++ + {0x1.ffe010p-1f, Binary16.ONE}, // round = 1, sticky = 1 => ++ + + // Test subnormal rounding + // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1 + {0x1.ff8000p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 0 + {0x1.ff8010p-15f, Binary16.MAX_SUBNORMAL}, // round = 0, sticky = 1 + {0x1.ffc000p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 0 => ++ + {0x1.ffc010p-15f, Binary16.MIN_NORMAL}, // round = 1, sticky = 1 => ++ + + // Test rounding near binary16 MIN_VALUE + // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f + // Half-way case,0x1.0p-25f, and smaller should round down to zero + {0x1.fffffep-26f, Binary16.POSITIVE_ZERO}, // nextDown in float + {0x1.000000p-25f, Binary16.POSITIVE_ZERO}, + {0x1.000002p-25f, Binary16.MIN_VALUE}, // nextUp in float + {0x1.100000p-25f, Binary16.MIN_VALUE}, + + // Test rounding near overflow threshold + // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1 + {0x1.ffc000p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 0 + {0x1.ffc010p15f, Binary16.MAX_VALUE}, // round = 0, sticky = 1 + {0x1.ffe000p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++ + {0x1.ffe010p15f, Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++ + }; + + for (var testCase : testCases) { + errors += compareAndReportError(testCase[0], + (short)testCase[1]); + } + return errors; + } + + private static int roundFloatToBinary16HalfWayCases() { + int errors = 0; + + // Test rounding of exact half-way cases between each pair of + // finite exactly-representable binary16 numbers. Also test + // rounding of half-way +/- ulp of the *float* value. + // Additionally, test +/- float ulp of the endpoints. (Other + // tests in this file make sure all short values round-trip so + // that doesn't need to be tested here.) + + for (int i = Binary16.POSITIVE_ZERO; // 0x0000 + i <= Binary16.MAX_VALUE; // 0x7bff + i += 2) { // Check every even/odd pair once + short lower = (short) i; + short upper = (short)(i+1); + + float lowerFloat = Float.float16ToFloat(lower); + float upperFloat = Float.float16ToFloat(upper); + assert lowerFloat < upperFloat; + + float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint + + errors += compareAndReportError(Math.nextUp(lowerFloat), lower); + errors += compareAndReportError(Math.nextDown(midway), lower); + + // Under round to nearest even, the midway point will + // round *down* to the (even) lower endpoint. + errors += compareAndReportError( midway, lower); + + errors += compareAndReportError(Math.nextUp( midway), upper); + errors += compareAndReportError(Math.nextDown(upperFloat), upper); + } + + // More testing around the overflow threshold + // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp + float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE); + float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f; + + errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE); + errors += compareAndReportError( binary16_MAX_VALUE, Binary16.MAX_VALUE); + errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE), Binary16.MAX_VALUE); + + // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so + // the half-way value greater than Binary16.MAX_VALUE should + // round up to the next even value, in this case Binary16.POSITIVE_INFINITY. + errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE); + errors += compareAndReportError( binary16_MAX_VALUE_halfUlp, Binary16.POSITIVE_INFINITY); + errors += compareAndReportError(Math.nextUp( binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY); + + return errors; + } + + private static int compareAndReportError(float input, + short expected) { + // Round to nearest even is sign symmetric + return compareAndReportError0( input, expected) + + compareAndReportError0(-input, Binary16.negate(expected)); + } + + private static int compareAndReportError0(float input, + short expected) { + short actual = Float.floatToFloat16(input); + if (!Binary16.equivalent(actual, expected)) { + System.out.println("Unexpected result of converting " + + Float.toHexString(input) + + " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) + + " got 0x" + Integer.toHexString(0xFFFF & actual)); + return 1; + } + return 0; + } + + private static int compareAndReportError0(short input, + float expected) { + float actual = Float.float16ToFloat(input); + if (Float.compare(actual, expected) != 0) { + System.out.println("Unexpected result of converting " + + Integer.toHexString(input & 0xFFFF) + + " to float. Expected " + Float.toHexString(expected) + + " got " + Float.toHexString(actual)); + return 1; + } + return 0; + } + + private static int compareAndReportError(short input, + float expected) { + // Round to nearest even is sign symmetric + return compareAndReportError0( input, expected) + + compareAndReportError0(Binary16.negate(input), -expected); + } + + private static int roundFloatToBinary16FullBinade() { + int errors = 0; + + // For each float value between 1.0 and less than 2.0 + // (i.e. set of float values with an exponent of 0), convert + // each value to binary16 and then convert that binary16 value + // back to float. + // + // Any exponent could be used; the maximum exponent for normal + // values would not exercise the full set of code paths since + // there is an up-front check on values that would overflow, + // which correspond to a ripple-carry of the significand that + // bumps the exponent. + short previous = (short)0; + for (int i = Float.floatToIntBits(1.0f); + i <= Float.floatToIntBits(Math.nextDown(2.0f)); + i++) { + // (Could also express the loop control directly in terms + // of floating-point operations, incrementing by ulp(1.0), + // etc.) + + float f = Float.intBitsToFloat(i); + short f_as_bin16 = Float.floatToFloat16(f); + short f_as_bin16_down = (short)(f_as_bin16 - 1); + short f_as_bin16_up = (short)(f_as_bin16 + 1); + + // Across successive float values to convert to binary16, + // the binary16 results should be semi-monotonic, + // non-decreasing in this case. + + // Only positive binary16 values so can compare using integer operations + if (f_as_bin16 < previous) { + errors++; + System.out.println("Semi-monotonicity violation observed on " + + Integer.toHexString(0xfff & f_as_bin16)); + } + previous = f_as_bin16; + + // If round-to-nearest was correctly done, when exactly + // mapped back to float, f_as_bin16 should be at least as + // close as either of its neighbors to the original value + // of f. + + float f_prime_down = Float.float16ToFloat(f_as_bin16_down); + float f_prime = Float.float16ToFloat(f_as_bin16); + float f_prime_up = Float.float16ToFloat(f_as_bin16_up); + + float f_prime_diff = Math.abs(f - f_prime); + if (f_prime_diff == 0.0) { + continue; + } + float f_prime_down_diff = Math.abs(f - f_prime_down); + float f_prime_up_diff = Math.abs(f - f_prime_up); + + if (f_prime_diff > f_prime_down_diff || + f_prime_diff > f_prime_up_diff) { + errors++; + System.out.println("Round-to-nearest violation on converting " + + Float.toHexString(f) + " to binary16 and back."); + } + } + return errors; + } + + private static int alternativeImplementation() { + int errors = 0; + + // For exhaustive test of all float values use + // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) { + + for (long ell = Float.floatToIntBits(2.0f); + ell <= Float.floatToIntBits(4.0f); + ell++) { + float f = Float.intBitsToFloat((int)ell); + short s1 = Float.floatToFloat16(f); + short s2 = altFloatToFloat16(f); + + if (s1 != s2) { + errors++; + System.out.println("Different conversion of float value " + Float.toHexString(f)); + } + } + + return errors; + } + + /* + * Rely on float operations to do rounding in both normal and + * subnormal binary16 cases. + */ + public static short altFloatToFloat16(float f) { + int doppel = Float.floatToRawIntBits(f); + short sign_bit = (short)((doppel & 0x8000_0000) >> 16); + + if (Float.isNaN(f)) { + // Preserve sign and attempt to preserve significand bits + return (short)(sign_bit + | 0x7c00 // max exponent + 1 + // Preserve high order bit of float NaN in the + // binary16 result NaN (tenth bit); OR in remaining + // bits into lower 9 bits of binary 16 significand. + | (doppel & 0x007f_e000) >> 13 // 10 bits + | (doppel & 0x0000_1ff0) >> 4 // 9 bits + | (doppel & 0x0000_000f)); // 4 bits + } + + float abs_f = Math.abs(f); + + // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp + if (abs_f >= (65504.0f + 16.0f) ) { + return (short)(sign_bit | 0x7c00); // Positive or negative infinity + } else { + // Smallest magnitude nonzero representable binary16 value + // is equal to 0x1.0p-24; half-way and smaller rounds to zero. + if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals. + return sign_bit; // Positive or negative zero + } + + // Dealing with finite values in exponent range of + // binary16 (when rounding is done, could still round up) + int exp = Math.getExponent(f); + assert -25 <= exp && exp <= 15; + short signif_bits; + + if (exp <= -15) { // scale down to float subnormal range to do rounding + // Use a float multiply to compute the correct + // trailing significand bits for a binary16 subnormal. + // + // The exponent range of normalized binary16 subnormal + // values is [-24, -15]. The exponent range of float + // subnormals is [-149, -140]. Multiply abs_f down by + // 2^(-125) -- since (-125 = -149 - (-24)) -- so that + // the trailing bits of a subnormal float represent + // the correct trailing bits of a binary16 subnormal. + exp = -15; // Subnormal encoding using -E_max. + float f_adjust = abs_f * 0x1.0p-125f; + + // In case the significand rounds up and has a carry + // propagate all the way up, take the bottom 11 bits + // rather than bottom 10 bits. Adding this value, + // rather than OR'ing htis value, will cause the right + // exponent adjustment. + signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff); + return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) ); + } else { + // Scale down to subnormal range to round off excess bits + int scalingExp = -139 - exp; + float scaled = Math.scalb(Math.scalb(f, scalingExp), + -scalingExp); + exp = Math.getExponent(scaled); + doppel = Float.floatToRawIntBits(scaled); + + signif_bits = (short)((doppel & 0x007f_e000) >> + (FloatConsts.SIGNIFICAND_WIDTH - 11)); + return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) ); + } + } + } + + public static class Binary16 { + public static final short POSITIVE_INFINITY = (short)0x7c00; + public static final short MAX_VALUE = 0x7bff; + public static final short ONE = 0x3c00; + public static final short MIN_NORMAL = 0x0400; + public static final short MAX_SUBNORMAL = 0x03ff; + public static final short MIN_VALUE = 0x0001; + public static final short POSITIVE_ZERO = 0x0000; + + public static boolean isNaN(short binary16) { + return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and... + && ((binary16 & 0x03ff) != 0 ); // significand nonzero. + } + + public static short negate(short binary16) { + return (short)(binary16 ^ 0x8000 ); // Flip only sign bit. + } + + public static boolean equivalent(short bin16_1, short bin16_2) { + return (bin16_1 == bin16_2) || + isNaN(bin16_1) && isNaN(bin16_2); + } + } +} diff --git a/test/jdk/java/lang/Float/Binary16ConversionNaN.java b/test/jdk/java/lang/Float/Binary16ConversionNaN.java new file mode 100644 index 00000000000..d541fdf3c31 --- /dev/null +++ b/test/jdk/java/lang/Float/Binary16ConversionNaN.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8289551 + * @summary Verify NaN sign and significand bits are preserved across conversions + */ + +/* + * The behavior tested below is an implementation property not + * required by the specification. It would be acceptable for this + * information to not be preserved (as long as a NaN is returned) if, + * say, a intrinsified version using native hardware instructions + * behaved differently. + * + * If that is the case, this test should be modified to disable + * intrinsics or to otherwise not run on platforms with an differently + * behaving intrinsic. + */ +public class Binary16ConversionNaN { + public static void main(String... argv) { + int errors = 0; + errors += binary16NaNRoundTrip(); + + if (errors > 0) + throw new RuntimeException(errors + " errors"); + } + + /* + * Put all 16-bit NaN values through a conversion loop and make + * sure the significand, sign, and exponent are all preserved. + */ + private static int binary16NaNRoundTrip() { + int errors = 0; + final int NAN_EXPONENT = 0x7c00; + final int SIGN_BIT = 0x8000; + + // A NaN has a nonzero significand + for (int i = 1; i <= 0x3ff; i++) { + short binary16NaN = (short)(NAN_EXPONENT | i); + assert isNaN(binary16NaN); + errors += testRoundTrip( binary16NaN); + errors += testRoundTrip((short)(SIGN_BIT | binary16NaN)); + } + return errors; + } + + private static boolean isNaN(short binary16) { + return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and... + && ((binary16 & 0x03ff) != 0 ); // significand nonzero. + } + + private static int testRoundTrip(int i) { + int errors = 0; + short s = (short)i; + float f = Float.float16ToFloat(s); + short s2 = Float.floatToFloat16(f); + + if (s != s2) { + errors++; + System.out.println("Roundtrip failure on NaN value " + + Integer.toHexString(0xFFFF & (int)s) + + "\t got back " + Integer.toHexString(0xFFFF & (int)s2)); + } + return errors; + } +}