8289551: Conversions between bit representations of half precision values and floats

Reviewed-by: psandoz, jrose
2022-07-26 16:54:32 +00:00 · 2022-07-26 16:54:32 +00:00 · 7318b22209
commit 7318b22209
parent 2ae8e31183
3 changed files with 703 additions and 0 deletions
--- a/src/java.base/share/classes/java/lang/Float.java
+++ b/src/java.base/share/classes/java/lang/Float.java
@ -30,6 +30,7 @@ import java.lang.constant.Constable;
 import java.lang.constant.ConstantDesc;
 import java.util.Optional;

+import jdk.internal.math.FloatConsts;
 import jdk.internal.math.FloatingDecimal;
 import jdk.internal.math.FloatToDecimal;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
@ -975,6 +976,198 @@ public final class Float extends Number
    @IntrinsicCandidate
    public static native float intBitsToFloat(int bits);

+    /**
+     * {@return the {@code float} value closest to the numerical value
+     * of the argument, a floating-point binary16 value encoded in a
+     * {@code short}} The conversion is exact; all binary16 values can
+     * be exactly represented in {@code float}.
+     *
+     * Special cases:
+     * <ul>
+     * <li> If the argument is zero, the result is a zero with the
+     * same sign as the argument.
+     * <li> If the argument is infinite, the result is an infinity
+     * with the same sign as the argument.
+     * <li> If the argument is a NaN, the result is a NaN.
+     * </ul>
+     *
+     * <h4><a id=binary16Format>IEEE 754 binary16 format</a></h4>
+     * The IEEE 754 standard defines binary16 as a 16-bit format, along
+     * with the 32-bit binary32 format (corresponding to the {@code
+     * float} type) and the 64-bit binary64 format (corresponding to
+     * the {@code double} type). The binary16 format is similar to the
+     * other IEEE 754 formats, except smaller, having all the usual
+     * IEEE 754 values such as NaN, signed infinities, signed zeros,
+     * and subnormals. The parameters (JLS {@jls 4.2.3}) for the
+     * binary16 format are N = 11 precision bits, K = 5 exponent bits,
+     * <i>E</i><sub><i>max</i></sub> = 15, and
+     * <i>E</i><sub><i>min</i></sub> = -14.
+     *
+     * @apiNote
+     * This method corresponds to the convertFormat operation defined
+     * in IEEE 754 from the binary16 format to the binary32 format.
+     * The operation of this method is analogous to a primitive
+     * widening conversion (JLS {@jls 5.1.2}).
+     *
+     * @param floatBinary16 the binary16 value to convert to {@code float}
+     * @since 20
+     */
+    // @IntrinsicCandidate
+    public static float float16ToFloat(short floatBinary16) {
+        /*
+         * The binary16 format has 1 sign bit, 5 exponent bits, and 10
+         * significand bits. The exponent bias is 15.
+         */
+        int bin16arg = (int)floatBinary16;
+        int bin16SignBit     = 0x8000 & bin16arg;
+        int bin16ExpBits     = 0x7c00 & bin16arg;
+        int bin16SignifBits  = 0x03FF & bin16arg;
+
+        // Shift left difference in the number of significand bits in
+        // the float and binary16 formats
+        final int SIGNIF_SHIFT = (FloatConsts.SIGNIFICAND_WIDTH - 11);
+
+        float sign = (bin16SignBit != 0) ? -1.0f : 1.0f;
+
+        // Extract binary16 exponent, remove its bias, add in the bias
+        // of a float exponent and shift to correct bit location
+        // (significand width includes the implicit bit so shift one
+        // less).
+        int bin16Exp = (bin16ExpBits >> 10) - 15;
+        if (bin16Exp == -15) {
+            // For subnormal binary16 values and 0, the numerical
+            // value is 2^24 * the significand as an integer (no
+            // implicit bit).
+            return sign * (0x1p-24f * bin16SignifBits);
+        } else if (bin16Exp == 16) {
+            return (bin16SignifBits == 0) ?
+                sign * Float.POSITIVE_INFINITY :
+                Float.intBitsToFloat((bin16SignBit << 16) |
+                                     0x7f80_0000 |
+                                     // Preserve NaN signif bits
+                                     ( bin16SignifBits << SIGNIF_SHIFT ));
+        }
+
+        assert -15 < bin16Exp  && bin16Exp < 16;
+
+        int floatExpBits = (bin16Exp + FloatConsts.EXP_BIAS)
+            << (FloatConsts.SIGNIFICAND_WIDTH - 1);
+
+        // Compute and combine result sign, exponent, and significand bits.
+        return Float.intBitsToFloat((bin16SignBit << 16) |
+                                    floatExpBits |
+                                    (bin16SignifBits << SIGNIF_SHIFT));
+    }
+
+    /**
+     * {@return the floating-point binary16 value, encoded in a {@code
+     * short}, closest in value to the argument}
+     * The conversion is computed under the {@linkplain
+     * java.math.RoundingMode#HALF_EVEN round to nearest even rounding
+     * mode}.
+     *
+     * Special cases:
+     * <ul>
+     * <li> If the argument is zero, the result is a zero with the
+     * same sign as the argument.
+     * <li> If the argument is infinite, the result is an infinity
+     * with the same sign as the argument.
+     * <li> If the argument is a NaN, the result is a NaN.
+     * </ul>
+     *
+     * The <a href="#binary16Format">binary16 format</a> is discussed in
+     * more detail in the {@link #float16ToFloat} method.
+     *
+     * @apiNote
+     * This method corresponds to the convertFormat operation defined
+     * in IEEE 754 from the binary32 format to the binary16 format.
+     * The operation of this method is analogous to a primitive
+     * narrowing conversion (JLS {@jls 5.1.3}).
+     *
+     * @param f the {@code float} value to convert to binary16
+     * @since 20
+     */
+    // @IntrinsicCandidate
+    public static short floatToFloat16(float f) {
+        int doppel = Float.floatToRawIntBits(f);
+        short sign_bit = (short)((doppel & 0x8000_0000) >> 16);
+
+        if (Float.isNaN(f)) {
+            // Preserve sign and attempt to preserve significand bits
+            return (short)(sign_bit
+                    | 0x7c00 // max exponent + 1
+                    // Preserve high order bit of float NaN in the
+                    // binary16 result NaN (tenth bit); OR in remaining
+                    // bits into lower 9 bits of binary 16 significand.
+                    | (doppel & 0x007f_e000) >> 13 // 10 bits
+                    | (doppel & 0x0000_1ff0) >> 4  //  9 bits
+                    | (doppel & 0x0000_000f));     //  4 bits
+        }
+
+        float abs_f = Math.abs(f);
+
+        // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp
+        if (abs_f >= (0x1.ffcp15f + 0x0.002p15f) ) {
+            return (short)(sign_bit | 0x7c00); // Positive or negative infinity
+        }
+
+        // Smallest magnitude nonzero representable binary16 value
+        // is equal to 0x1.0p-24; half-way and smaller rounds to zero.
+        if (abs_f <= 0x1.0p-24f * 0.5f) { // Covers float zeros and subnormals.
+            return sign_bit; // Positive or negative zero
+        }
+
+        // Dealing with finite values in exponent range of binary16
+        // (when rounding is done, could still round up)
+        int exp = Math.getExponent(f);
+        assert -25 <= exp && exp <= 15;
+
+        // For binary16 subnormals, beside forcing exp to -15, retain
+        // the difference expdelta = E_min - exp.  This is the excess
+        // shift value, in addition to 13, to be used in the
+        // computations below.  Further the (hidden) msb with value 1
+        // in f must be involved as well.
+        int expdelta = 0;
+        int msb = 0x0000_0000;
+        if (exp < -14) {
+            expdelta = -14 - exp;
+            exp = -15;
+            msb = 0x0080_0000;
+        }
+        int f_signif_bits = doppel & 0x007f_ffff | msb;
+
+        // Significand bits as if using rounding to zero (truncation).
+        short signif_bits = (short)(f_signif_bits >> (13 + expdelta));
+
+        // For round to nearest even, determining whether or not to
+        // round up (in magnitude) is a function of the least
+        // significant bit (LSB), the next bit position (the round
+        // position), and the sticky bit (whether there are any
+        // nonzero bits in the exact result to the right of the round
+        // digit). An increment occurs in three cases:
+        //
+        // LSB  Round Sticky
+        // 0    1     1
+        // 1    1     0
+        // 1    1     1
+        // See "Computer Arithmetic Algorithms," Koren, Table 4.9
+
+        int lsb    = f_signif_bits & (1 << 13 + expdelta);
+        int round  = f_signif_bits & (1 << 12 + expdelta);
+        int sticky = f_signif_bits & ((1 << 12 + expdelta) - 1);
+
+        if (round != 0 && ((lsb | sticky) != 0 )) {
+            signif_bits++;
+        }
+
+        // No bits set in significand beyond the *first* exponent bit,
+        // not just the sigificand; quantity is added to the exponent
+        // to implement a carry out from rounding the significand.
+        assert (0xf800 & signif_bits) == 0x0;
+
+        return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
+    }
+
    /**
     * Compares two {@code Float} objects numerically.
     *
--- a/test/jdk/java/lang/Float/Binary16Conversion.java
+++ b/test/jdk/java/lang/Float/Binary16Conversion.java
@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8289551
+ * @summary Verify conversion between float and the binary16 format
+ * @library ../Math
+ * @build FloatConsts
+ * @run main Binary16Conversion
+ */
+
+public class Binary16Conversion {
+    public static void main(String... argv) {
+        int errors = 0;
+        errors += binary16RoundTrip();
+        // Note that helper methods do sign-symmetric testing
+        errors += binary16CardinalValues();
+        errors += roundFloatToBinary16();
+        errors += roundFloatToBinary16HalfWayCases();
+        errors += roundFloatToBinary16FullBinade();
+        errors += alternativeImplementation();
+
+        if (errors > 0)
+            throw new RuntimeException(errors + " errors");
+    }
+
+    /*
+     * Put all 16-bit values through a conversion loop and make sure
+     * the values are preserved (NaN bit patterns notwithstanding).
+     */
+    private static int binary16RoundTrip() {
+        int errors = 0;
+        for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) {
+            short s = (short)i;
+            float f =  Float.float16ToFloat(s);
+            short s2 = Float.floatToFloat16(f);
+
+            if (!Binary16.equivalent(s, s2)) {
+                errors++;
+                System.out.println("Roundtrip failure on " +
+                                   Integer.toHexString(0xFFFF & (int)s) +
+                                   "\t got back " + Integer.toHexString(0xFFFF & (int)s2));
+            }
+        }
+        return errors;
+    }
+
+    private static int binary16CardinalValues() {
+        int errors = 0;
+        // Encode short value for different binary16 cardinal values as an
+        // integer-valued float.
+        float[][] testCases = {
+            {Binary16.POSITIVE_ZERO,         +0.0f},
+            {Binary16.MIN_VALUE,              0x1.0p-24f},
+            {Binary16.MAX_SUBNORMAL,          0x1.ff8p-15f},
+            {Binary16.MIN_NORMAL,             0x1.0p-14f},
+            {Binary16.ONE,                    1.0f},
+            {Binary16.MAX_VALUE,              65504.0f},
+            {Binary16.POSITIVE_INFINITY,      Float.POSITIVE_INFINITY},
+        };
+
+        // Check conversions in both directions
+
+        // short -> float
+        for (var testCase : testCases) {
+            errors += compareAndReportError((short)testCase[0],
+                                            testCase[1]);
+        }
+
+        // float -> short
+        for (var testCase : testCases) {
+            errors += compareAndReportError(testCase[1],
+                                            (short)testCase[0]);
+        }
+
+        return errors;
+    }
+
+    private static int roundFloatToBinary16() {
+        int errors = 0;
+
+        float[][] testCases = {
+            // Test all combinations of LSB, round, and sticky bit
+
+            // LSB = 0, test combination of round and sticky
+            {0x1.ff8000p-1f,       (short)0x3bfe},              // round = 0, sticky = 0
+            {0x1.ff8010p-1f,       (short)0x3bfe},              // round = 0, sticky = 1
+            {0x1.ffa000p-1f,       (short)0x3bfe},              // round = 1, sticky = 0
+            {0x1.ffa010p-1f,       (short)0x3bff},              // round = 1, sticky = 1 => ++
+
+            // LSB = 1, test combination of round and sticky
+            {0x1.ffc000p-1f,       Binary16.ONE-1},             // round = 0, sticky = 0
+            {0x1.ffc010p-1f,       Binary16.ONE-1},             // round = 0, sticky = 1
+            {0x1.ffe000p-1f,       Binary16.ONE},               // round = 1, sticky = 0 => ++
+            {0x1.ffe010p-1f,       Binary16.ONE},               // round = 1, sticky = 1 => ++
+
+            // Test subnormal rounding
+            // Largest subnormal binary16 0x03ff => 0x1.ff8p-15f; LSB = 1
+            {0x1.ff8000p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 0
+            {0x1.ff8010p-15f,      Binary16.MAX_SUBNORMAL},     // round = 0, sticky = 1
+            {0x1.ffc000p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 0 => ++
+            {0x1.ffc010p-15f,      Binary16.MIN_NORMAL},        // round = 1, sticky = 1 => ++
+
+            // Test rounding near binary16 MIN_VALUE
+            // Smallest in magnitude subnormal binary16 value 0x0001 => 0x1.0p-24f
+            // Half-way case,0x1.0p-25f, and smaller should round down to zero
+            {0x1.fffffep-26f,      Binary16.POSITIVE_ZERO},     // nextDown in float
+            {0x1.000000p-25f,      Binary16.POSITIVE_ZERO},
+            {0x1.000002p-25f,      Binary16.MIN_VALUE},         // nextUp in float
+            {0x1.100000p-25f,      Binary16.MIN_VALUE},
+
+            // Test rounding near overflow threshold
+            // Largest normal binary16 number 0x7bff => 0x1.ffcp15f; LSB = 1
+            {0x1.ffc000p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 0
+            {0x1.ffc010p15f,       Binary16.MAX_VALUE},         // round = 0, sticky = 1
+            {0x1.ffe000p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 0 => ++
+            {0x1.ffe010p15f,       Binary16.POSITIVE_INFINITY}, // round = 1, sticky = 1 => ++
+        };
+
+        for (var testCase : testCases) {
+            errors += compareAndReportError(testCase[0],
+                                            (short)testCase[1]);
+        }
+        return errors;
+    }
+
+    private static int roundFloatToBinary16HalfWayCases() {
+        int errors = 0;
+
+        // Test rounding of exact half-way cases between each pair of
+        // finite exactly-representable binary16 numbers. Also test
+        // rounding of half-way +/- ulp of the *float* value.
+        // Additionally, test +/- float ulp of the endpoints. (Other
+        // tests in this file make sure all short values round-trip so
+        // that doesn't need to be tested here.)
+
+        for (int i = Binary16.POSITIVE_ZERO; // 0x0000
+             i    <= Binary16.MAX_VALUE;     // 0x7bff
+             i += 2) {     // Check every even/odd pair once
+            short lower = (short) i;
+            short upper = (short)(i+1);
+
+            float lowerFloat = Float.float16ToFloat(lower);
+            float upperFloat = Float.float16ToFloat(upper);
+            assert lowerFloat < upperFloat;
+
+            float midway = (lowerFloat + upperFloat) * 0.5f; // Exact midpoint
+
+            errors += compareAndReportError(Math.nextUp(lowerFloat),   lower);
+            errors += compareAndReportError(Math.nextDown(midway),     lower);
+
+            // Under round to nearest even, the midway point will
+            // round *down* to the (even) lower endpoint.
+            errors += compareAndReportError(              midway,      lower);
+
+            errors += compareAndReportError(Math.nextUp(  midway),     upper);
+            errors += compareAndReportError(Math.nextDown(upperFloat), upper);
+        }
+
+        // More testing around the overflow threshold
+        // Binary16.ulp(Binary16.MAX_VALUE) == 32.0f; test around Binary16.MAX_VALUE + 1/2 ulp
+        float binary16_MAX_VALUE = Float.float16ToFloat(Binary16.MAX_VALUE);
+        float binary16_MAX_VALUE_halfUlp = binary16_MAX_VALUE + 16.0f;
+
+        errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE), Binary16.MAX_VALUE);
+        errors += compareAndReportError(              binary16_MAX_VALUE,  Binary16.MAX_VALUE);
+        errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE), Binary16.MAX_VALUE);
+
+        // Binary16.MAX_VALUE is an "odd" value since its LSB = 1 so
+        // the half-way value greater than Binary16.MAX_VALUE should
+        // round up to the next even value, in this case Binary16.POSITIVE_INFINITY.
+        errors += compareAndReportError(Math.nextDown(binary16_MAX_VALUE_halfUlp), Binary16.MAX_VALUE);
+        errors += compareAndReportError(              binary16_MAX_VALUE_halfUlp,  Binary16.POSITIVE_INFINITY);
+        errors += compareAndReportError(Math.nextUp(  binary16_MAX_VALUE_halfUlp), Binary16.POSITIVE_INFINITY);
+
+        return errors;
+    }
+
+    private static int compareAndReportError(float input,
+                                             short expected) {
+        // Round to nearest even is sign symmetric
+        return compareAndReportError0( input,                 expected) +
+               compareAndReportError0(-input, Binary16.negate(expected));
+    }
+
+    private static int compareAndReportError0(float input,
+                                              short expected) {
+        short actual = Float.floatToFloat16(input);
+        if (!Binary16.equivalent(actual, expected)) {
+            System.out.println("Unexpected result of converting " +
+                               Float.toHexString(input) +
+                               " to short. Expected 0x" + Integer.toHexString(0xFFFF & expected) +
+                               " got 0x" + Integer.toHexString(0xFFFF & actual));
+            return 1;
+            }
+        return 0;
+    }
+
+    private static int compareAndReportError0(short input,
+                                              float expected) {
+        float actual = Float.float16ToFloat(input);
+        if (Float.compare(actual, expected) != 0) {
+            System.out.println("Unexpected result of converting " +
+                               Integer.toHexString(input & 0xFFFF) +
+                               " to float. Expected " + Float.toHexString(expected) +
+                               " got " + Float.toHexString(actual));
+            return 1;
+            }
+        return 0;
+    }
+
+    private static int compareAndReportError(short input,
+                                             float expected) {
+        // Round to nearest even is sign symmetric
+        return compareAndReportError0(                input,   expected) +
+               compareAndReportError0(Binary16.negate(input), -expected);
+    }
+
+    private static int roundFloatToBinary16FullBinade() {
+        int errors = 0;
+
+        // For each float value between 1.0 and less than 2.0
+        // (i.e. set of float values with an exponent of 0), convert
+        // each value to binary16 and then convert that binary16 value
+        // back to float.
+        //
+        // Any exponent could be used; the maximum exponent for normal
+        // values would not exercise the full set of code paths since
+        // there is an up-front check on values that would overflow,
+        // which correspond to a ripple-carry of the significand that
+        // bumps the exponent.
+        short previous = (short)0;
+        for (int i = Float.floatToIntBits(1.0f);
+             i <= Float.floatToIntBits(Math.nextDown(2.0f));
+             i++) {
+            // (Could also express the loop control directly in terms
+            // of floating-point operations, incrementing by ulp(1.0),
+            // etc.)
+
+            float f = Float.intBitsToFloat(i);
+            short f_as_bin16 = Float.floatToFloat16(f);
+            short f_as_bin16_down = (short)(f_as_bin16 - 1);
+            short f_as_bin16_up   = (short)(f_as_bin16 + 1);
+
+            // Across successive float values to convert to binary16,
+            // the binary16 results should be semi-monotonic,
+            // non-decreasing in this case.
+
+            // Only positive binary16 values so can compare using integer operations
+            if (f_as_bin16 < previous) {
+                errors++;
+                System.out.println("Semi-monotonicity violation observed on " +
+                                   Integer.toHexString(0xfff & f_as_bin16));
+            }
+            previous = f_as_bin16;
+
+            // If round-to-nearest was correctly done, when exactly
+            // mapped back to float, f_as_bin16 should be at least as
+            // close as either of its neighbors to the original value
+            // of f.
+
+            float f_prime_down = Float.float16ToFloat(f_as_bin16_down);
+            float f_prime      = Float.float16ToFloat(f_as_bin16);
+            float f_prime_up   = Float.float16ToFloat(f_as_bin16_up);
+
+            float f_prime_diff = Math.abs(f - f_prime);
+            if (f_prime_diff == 0.0) {
+                continue;
+            }
+            float f_prime_down_diff = Math.abs(f - f_prime_down);
+            float f_prime_up_diff   = Math.abs(f - f_prime_up);
+
+            if (f_prime_diff > f_prime_down_diff ||
+                f_prime_diff > f_prime_up_diff) {
+                errors++;
+                System.out.println("Round-to-nearest violation on converting " +
+                                   Float.toHexString(f) + " to binary16 and back.");
+            }
+        }
+        return errors;
+    }
+
+    private static int alternativeImplementation() {
+        int errors = 0;
+
+        // For exhaustive test of all float values use
+        // for (long ell = Integer.MIN_VALUE; ell <= Integer.MAX_VALUE; ell++) {
+
+        for (long ell   = Float.floatToIntBits(2.0f);
+             ell       <= Float.floatToIntBits(4.0f);
+             ell++) {
+            float f = Float.intBitsToFloat((int)ell);
+            short s1 = Float.floatToFloat16(f);
+            short s2 =    altFloatToFloat16(f);
+
+            if (s1 != s2) {
+                errors++;
+                System.out.println("Different conversion of float value " + Float.toHexString(f));
+            }
+        }
+
+        return errors;
+    }
+
+    /*
+     * Rely on float operations to do rounding in both normal and
+     * subnormal binary16 cases.
+     */
+    public static short altFloatToFloat16(float f) {
+        int doppel = Float.floatToRawIntBits(f);
+        short sign_bit = (short)((doppel & 0x8000_0000) >> 16);
+
+        if (Float.isNaN(f)) {
+            // Preserve sign and attempt to preserve significand bits
+            return (short)(sign_bit
+                    | 0x7c00 // max exponent + 1
+                    // Preserve high order bit of float NaN in the
+                    // binary16 result NaN (tenth bit); OR in remaining
+                    // bits into lower 9 bits of binary 16 significand.
+                    | (doppel & 0x007f_e000) >> 13 // 10 bits
+                    | (doppel & 0x0000_1ff0) >> 4  //  9 bits
+                    | (doppel & 0x0000_000f));     //  4 bits
+        }
+
+        float abs_f = Math.abs(f);
+
+        // The overflow threshold is binary16 MAX_VALUE + 1/2 ulp
+        if (abs_f >= (65504.0f + 16.0f) ) {
+            return (short)(sign_bit | 0x7c00); // Positive or negative infinity
+        } else {
+            // Smallest magnitude nonzero representable binary16 value
+            // is equal to 0x1.0p-24; half-way and smaller rounds to zero.
+            if (abs_f <= 0x1.0p-25f) { // Covers float zeros and subnormals.
+                return sign_bit; // Positive or negative zero
+            }
+
+            // Dealing with finite values in exponent range of
+            // binary16 (when rounding is done, could still round up)
+            int exp = Math.getExponent(f);
+            assert -25 <= exp && exp <= 15;
+            short signif_bits;
+
+            if (exp <= -15) { // scale down to float subnormal range to do rounding
+                // Use a float multiply to compute the correct
+                // trailing significand bits for a binary16 subnormal.
+                //
+                // The exponent range of normalized binary16 subnormal
+                // values is [-24, -15]. The exponent range of float
+                // subnormals is [-149, -140]. Multiply abs_f down by
+                // 2^(-125) -- since (-125 = -149 - (-24)) -- so that
+                // the trailing bits of a subnormal float represent
+                // the correct trailing bits of a binary16 subnormal.
+                exp = -15; // Subnormal encoding using -E_max.
+                float f_adjust = abs_f * 0x1.0p-125f;
+
+                // In case the significand rounds up and has a carry
+                // propagate all the way up, take the bottom 11 bits
+                // rather than bottom 10 bits. Adding this value,
+                // rather than OR'ing htis value, will cause the right
+                // exponent adjustment.
+                signif_bits = (short)(Float.floatToRawIntBits(f_adjust) & 0x07ff);
+                return (short)(sign_bit | ( ((exp + 15) << 10) + signif_bits ) );
+            } else {
+                // Scale down to subnormal range to round off excess bits
+                int scalingExp = -139 - exp;
+                float scaled = Math.scalb(Math.scalb(f, scalingExp),
+                                                       -scalingExp);
+                exp = Math.getExponent(scaled);
+                doppel = Float.floatToRawIntBits(scaled);
+
+                signif_bits = (short)((doppel & 0x007f_e000) >>
+                                      (FloatConsts.SIGNIFICAND_WIDTH - 11));
+                return (short)(sign_bit | ( ((exp + 15) << 10) | signif_bits ) );
+            }
+        }
+    }
+
+    public static class Binary16 {
+        public static final short POSITIVE_INFINITY = (short)0x7c00;
+        public static final short MAX_VALUE         = 0x7bff;
+        public static final short ONE               = 0x3c00;
+        public static final short MIN_NORMAL        = 0x0400;
+        public static final short MAX_SUBNORMAL     = 0x03ff;
+        public static final short MIN_VALUE         = 0x0001;
+        public static final short POSITIVE_ZERO     = 0x0000;
+
+        public static boolean isNaN(short binary16) {
+            return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and...
+                && ((binary16 & 0x03ff) != 0 );    // significand nonzero.
+        }
+
+        public static short negate(short binary16) {
+            return (short)(binary16 ^ 0x8000 ); // Flip only sign bit.
+        }
+
+        public static boolean equivalent(short bin16_1, short bin16_2) {
+            return (bin16_1 == bin16_2) ||
+                isNaN(bin16_1) && isNaN(bin16_2);
+        }
+    }
+}
--- a/test/jdk/java/lang/Float/Binary16ConversionNaN.java
+++ b/test/jdk/java/lang/Float/Binary16ConversionNaN.java
@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8289551
+ * @summary Verify NaN sign and significand bits are preserved across conversions
+ */
+
+/*
+ * The behavior tested below is an implementation property not
+ * required by the specification. It would be acceptable for this
+ * information to not be preserved (as long as a NaN is returned) if,
+ * say, a intrinsified version using native hardware instructions
+ * behaved differently.
+ *
+ * If that is the case, this test should be modified to disable
+ * intrinsics or to otherwise not run on platforms with an differently
+ * behaving intrinsic.
+ */
+public class Binary16ConversionNaN {
+    public static void main(String... argv) {
+        int errors = 0;
+        errors += binary16NaNRoundTrip();
+
+        if (errors > 0)
+            throw new RuntimeException(errors + " errors");
+    }
+
+    /*
+     * Put all 16-bit NaN values through a conversion loop and make
+     * sure the significand, sign, and exponent are all preserved.
+     */
+    private static int binary16NaNRoundTrip() {
+        int errors = 0;
+        final int NAN_EXPONENT = 0x7c00;
+        final int SIGN_BIT     = 0x8000;
+
+        // A NaN has a nonzero significand
+        for (int i = 1; i <= 0x3ff; i++) {
+            short binary16NaN = (short)(NAN_EXPONENT | i);
+            assert isNaN(binary16NaN);
+            errors += testRoundTrip(                   binary16NaN);
+            errors += testRoundTrip((short)(SIGN_BIT | binary16NaN));
+        }
+        return errors;
+    }
+
+    private static boolean isNaN(short binary16) {
+        return ((binary16 & 0x7c00) == 0x7c00) // Max exponent and...
+            && ((binary16 & 0x03ff) != 0 );    // significand nonzero.
+    }
+
+    private static int testRoundTrip(int i) {
+        int errors = 0;
+        short s = (short)i;
+        float f =  Float.float16ToFloat(s);
+        short s2 = Float.floatToFloat16(f);
+
+        if (s != s2) {
+            errors++;
+            System.out.println("Roundtrip failure on NaN value " +
+                               Integer.toHexString(0xFFFF & (int)s) +
+                               "\t got back " + Integer.toHexString(0xFFFF & (int)s2));
+        }
+        return errors;
+    }
+}