diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index 3505e081d38..fbf95ae02b5 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -1995,6 +1995,13 @@ void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   emit_int16(0x2C, (0xC0 | encode));
 }
 
+void Assembler::cvtss2sil(Register dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int16(0x2D, (0xC0 | encode));
+}
+
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -2088,6 +2095,21 @@ void Assembler::vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
   emit_int16(0x5B, (0xC0 | encode));
 }
 
+void Assembler::vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int16(0x5B, (0xC0 | encode));
+}
+
+void Assembler::evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int16(0x7B, (0xC0 | encode));
+}
+
 void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
   InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6493,7 +6515,6 @@ void Assembler::vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vec
   emit_int8((rmode));
 }
 
-
 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -12285,6 +12306,13 @@ void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   emit_int16(0x2C, (0xC0 | encode));
 }
 
+void Assembler::cvtsd2siq(Register dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int16(0x2D, (0xC0 | encode));
+}
+
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 7141e4b96c4..6af93b52fc6 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1149,6 +1149,7 @@ private:
   void cvtss2sd(XMMRegister dst, Address src);
 
   // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
+  void cvtsd2siq(Register dst, XMMRegister src);
   void cvttsd2sil(Register dst, Address src);
   void cvttsd2sil(Register dst, XMMRegister src);
   void cvttsd2siq(Register dst, Address src);
@@ -1157,6 +1158,7 @@ private:
   // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
   void cvttss2sil(Register dst, XMMRegister src);
   void cvttss2siq(Register dst, XMMRegister src);
+  void cvtss2sil(Register dst, XMMRegister src);
 
   // Convert vector double to int
   void cvttpd2dq(XMMRegister dst, XMMRegister src);
@@ -1166,6 +1168,7 @@ private:
   void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
 
   // Convert vector float and int
+  void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
   void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
 
   // Convert vector long to vector FP
@@ -1173,6 +1176,7 @@ private:
   void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
 
   // Convert vector double to long
+  void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
   void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
 
   // Evex casts with truncation
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 6d8b9101303..c4411be23cf 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -4061,41 +4061,18 @@ void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
 }
 
 /*
- * Algorithm for vector D2L and F2I conversions:-
- * a) Perform vector D2L/F2I cast.
- * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
- *    It signifies that source value could be any of the special floating point
- *    values(NaN,-Inf,Inf,Max,-Min).
- * c) Set destination to zero if source is NaN value.
- * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
+ * Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
+ * If src is NaN, the result is 0.
+ * If the src is negative infinity or any value less than or equal to the value of Integer.MIN_VALUE,
+ * the result is equal to the value of Integer.MIN_VALUE.
+ * If the src is positive infinity or any value greater than or equal to the value of Integer.MAX_VALUE,
+ * the result is equal to the value of Integer.MAX_VALUE.
  */
-
-void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
-                                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
-                                            Register scratch, int vec_enc) {
+void C2_MacroAssembler::vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+                                                            XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+                                                            Register scratch, AddressLiteral float_sign_flip,
+                                                            int vec_enc) {
   Label done;
-  evcvttpd2qq(dst, src, vec_enc);
-  evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
-  evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
-  kortestwl(ktmp1, ktmp1);
-  jccb(Assembler::equal, done);
-
-  vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
-  evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
-  evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
-
-  kxorwl(ktmp1, ktmp1, ktmp2);
-  evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
-  vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
-  evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
-  bind(done);
-}
-
-void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
-                                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
-                                           AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
-  Label done;
-  vcvttps2dq(dst, src, vec_enc);
   vmovdqu(xtmp1, float_sign_flip, scratch, vec_enc);
   vpcmpeqd(xtmp2, dst, xtmp1, vec_enc);
   vptest(xtmp2, xtmp2, vec_enc);
@@ -4120,11 +4097,11 @@ void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMM
   bind(done);
 }
 
-void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
-                                            KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
-                                            Register scratch, int vec_enc) {
+void C2_MacroAssembler::vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+                                                             XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
+                                                             Register scratch, AddressLiteral float_sign_flip,
+                                                             int vec_enc) {
   Label done;
-  vcvttps2dq(dst, src, vec_enc);
   evmovdqul(xtmp1, k0, float_sign_flip, false, vec_enc, scratch);
   Assembler::evpcmpeqd(ktmp1, k0, xtmp1, dst, vec_enc);
   kortestwl(ktmp1, ktmp1);
@@ -4141,6 +4118,115 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
   bind(done);
 }
 
+/*
+ * Following routine handles special floating point values(NaN/Inf/-Inf/Max/Min) for casting operation.
+ * If src is NaN, the result is 0.
+ * If the src is negative infinity or any value less than or equal to the value of Long.MIN_VALUE,
+ * the result is equal to the value of Long.MIN_VALUE.
+ * If the src is positive infinity or any value greater than or equal to the value of Long.MAX_VALUE,
+ * the result is equal to the value of Long.MAX_VALUE.
+ */
+void C2_MacroAssembler::vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+                                                              XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2,
+                                                              Register scratch, AddressLiteral double_sign_flip,
+                                                              int vec_enc) {
+  Label done;
+  evmovdqul(xtmp1, k0, double_sign_flip, false, vec_enc, scratch);
+  evpcmpeqq(ktmp1, xtmp1, dst, vec_enc);
+  kortestwl(ktmp1, ktmp1);
+  jccb(Assembler::equal, done);
+
+  vpxor(xtmp2, xtmp2, xtmp2, vec_enc);
+  evcmppd(ktmp2, k0, src, src, Assembler::UNORD_Q, vec_enc);
+  evmovdquq(dst, ktmp2, xtmp2, true, vec_enc);
+
+  kxorwl(ktmp1, ktmp1, ktmp2);
+  evcmppd(ktmp1, ktmp1, src, xtmp2, Assembler::NLT_UQ, vec_enc);
+  vpternlogq(xtmp2, 0x11, xtmp1, xtmp1, vec_enc);
+  evmovdquq(dst, ktmp1, xtmp2, true, vec_enc);
+  bind(done);
+}
+
+/*
+ * Algorithm for vector D2L and F2I conversions:-
+ * a) Perform vector D2L/F2I cast.
+ * b) Choose fast path if none of the result vector lane contains 0x80000000 value.
+ *    It signifies that source value could be any of the special floating point
+ *    values(NaN,-Inf,Inf,Max,-Min).
+ * c) Set destination to zero if source is NaN value.
+ * d) Replace 0x80000000 with MaxInt if source lane contains a +ve value.
+ */
+
+void C2_MacroAssembler::vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+                                            Register scratch, int vec_enc) {
+  evcvttpd2qq(dst, src, vec_enc);
+  vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
+}
+
+void C2_MacroAssembler::vector_castF2I_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+                                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+                                           AddressLiteral float_sign_flip, Register scratch, int vec_enc) {
+  vcvttps2dq(dst, src, vec_enc);
+  vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
+}
+
+void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                            KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
+                                            Register scratch, int vec_enc) {
+  vcvttps2dq(dst, src, vec_enc);
+  vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
+}
+
+#ifdef _LP64
+void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                                 KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+                                                 AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+  // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+  // and re-instantiate original MXCSR.RC mode after that.
+  ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+  ldmxcsr(new_mxcsr, scratch);
+  mov64(scratch, julong_cast(0.5L));
+  evpbroadcastq(xtmp1, scratch, vec_enc);
+  vaddpd(xtmp1, src , xtmp1, vec_enc);
+  evcvtpd2qq(dst, xtmp1, vec_enc);
+  vector_cast_double_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, double_sign_flip, vec_enc);
+  ldmxcsr(mxcsr_std, scratch);
+}
+
+void C2_MacroAssembler::vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                                KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
+                                                AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+  // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+  // and re-instantiate original MXCSR.RC mode after that.
+  ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+  ldmxcsr(new_mxcsr, scratch);
+  movl(scratch, jint_cast(0.5));
+  movq(xtmp1, scratch);
+  vbroadcastss(xtmp1, xtmp1, vec_enc);
+  vaddps(xtmp1, src , xtmp1, vec_enc);
+  vcvtps2dq(dst, xtmp1, vec_enc);
+  vector_cast_float_special_cases_evex(dst, src, xtmp1, xtmp2, ktmp1, ktmp2, scratch, float_sign_flip, vec_enc);
+  ldmxcsr(mxcsr_std, scratch);
+}
+
+void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                               XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
+                                               AddressLiteral new_mxcsr, Register scratch, int vec_enc) {
+  // Perform floor(val+0.5) operation under the influence of MXCSR.RC mode roundTowards -inf.
+  // and re-instantiate original MXCSR.RC mode after that.
+  ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std());
+  ldmxcsr(new_mxcsr, scratch);
+  movl(scratch, jint_cast(0.5));
+  movq(xtmp1, scratch);
+  vbroadcastss(xtmp1, xtmp1, vec_enc);
+  vaddps(xtmp1, src , xtmp1, vec_enc);
+  vcvtps2dq(dst, xtmp1, vec_enc);
+  vector_cast_float_special_cases_avx(dst, src, xtmp1, xtmp2, xtmp3, xtmp4, scratch, float_sign_flip, vec_enc);
+  ldmxcsr(mxcsr_std, scratch);
+}
+#endif
+
 void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
                                              BasicType from_elem_bt, BasicType to_elem_bt) {
   switch (from_elem_bt) {
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index 5ecdf20700d..5f8e38a93b2 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -303,6 +303,7 @@ public:
                            KRegister ktmp1, KRegister ktmp2, AddressLiteral float_sign_flip,
                            Register scratch, int vec_enc);
 
+
   void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
                            KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
                            Register scratch, int vec_enc);
@@ -310,6 +311,33 @@ public:
   void vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc,
                             BasicType from_elem_bt, BasicType to_elem_bt);
 
+  void vector_cast_double_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                             KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral double_sign_flip,
+                                             int vec_enc);
+
+  void vector_cast_float_special_cases_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                            KRegister ktmp1, KRegister ktmp2, Register scratch, AddressLiteral float_sign_flip,
+                                            int vec_enc);
+
+  void vector_cast_float_special_cases_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
+                                           XMMRegister xtmp2, XMMRegister xtmp3, XMMRegister xtmp4,
+                                           Register scratch, AddressLiteral float_sign_flip,
+                                           int vec_enc);
+
+#ifdef _LP64
+  void vector_round_double_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                                KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+                                AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+
+  void vector_round_float_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                               KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
+                               AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+
+  void vector_round_float_avx(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
+                              XMMRegister xtmp3, XMMRegister xtmp4, AddressLiteral float_sign_flip,
+                              AddressLiteral new_mxcsr, Register scratch, int vec_enc);
+#endif
+
   void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
                   bool merge, BasicType bt, int vlen_enc);
 
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 67faa195457..855c855089d 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -2252,12 +2252,12 @@ void MacroAssembler::fld_x(AddressLiteral src) {
   Assembler::fld_x(as_Address(src));
 }
 
-void MacroAssembler::ldmxcsr(AddressLiteral src) {
+void MacroAssembler::ldmxcsr(AddressLiteral src, Register scratchReg) {
   if (reachable(src)) {
     Assembler::ldmxcsr(as_Address(src));
   } else {
-    lea(rscratch1, src);
-    Assembler::ldmxcsr(Address(rscratch1, 0));
+    lea(scratchReg, src);
+    Assembler::ldmxcsr(Address(scratchReg, 0));
   }
 }
 
@@ -9120,6 +9120,80 @@ void MacroAssembler::convert_f2l(Register dst, XMMRegister src) {
   bind(done);
 }
 
+void MacroAssembler::round_float(Register dst, XMMRegister src, Register rtmp, Register rcx) {
+  // Following code is line by line assembly translation rounding algorithm.
+  // Please refer to java.lang.Math.round(float) algorithm for details.
+  const int32_t FloatConsts_EXP_BIT_MASK = 0x7F800000;
+  const int32_t FloatConsts_SIGNIFICAND_WIDTH = 24;
+  const int32_t FloatConsts_EXP_BIAS = 127;
+  const int32_t FloatConsts_SIGNIF_BIT_MASK = 0x007FFFFF;
+  const int32_t MINUS_32 = 0xFFFFFFE0;
+  Label L_special_case, L_block1, L_exit;
+  movl(rtmp, FloatConsts_EXP_BIT_MASK);
+  movdl(dst, src);
+  andl(dst, rtmp);
+  sarl(dst, FloatConsts_SIGNIFICAND_WIDTH - 1);
+  movl(rtmp, FloatConsts_SIGNIFICAND_WIDTH - 2 + FloatConsts_EXP_BIAS);
+  subl(rtmp, dst);
+  movl(rcx, rtmp);
+  movl(dst, MINUS_32);
+  testl(rtmp, dst);
+  jccb(Assembler::notEqual, L_special_case);
+  movdl(dst, src);
+  andl(dst, FloatConsts_SIGNIF_BIT_MASK);
+  orl(dst, FloatConsts_SIGNIF_BIT_MASK + 1);
+  movdl(rtmp, src);
+  testl(rtmp, rtmp);
+  jccb(Assembler::greaterEqual, L_block1);
+  negl(dst);
+  bind(L_block1);
+  sarl(dst);
+  addl(dst, 0x1);
+  sarl(dst, 0x1);
+  jmp(L_exit);
+  bind(L_special_case);
+  convert_f2i(dst, src);
+  bind(L_exit);
+}
+
+void MacroAssembler::round_double(Register dst, XMMRegister src, Register rtmp, Register rcx) {
+  // Following code is line by line assembly translation rounding algorithm.
+  // Please refer to java.lang.Math.round(double) algorithm for details.
+  const int64_t DoubleConsts_EXP_BIT_MASK = 0x7FF0000000000000L;
+  const int64_t DoubleConsts_SIGNIFICAND_WIDTH = 53;
+  const int64_t DoubleConsts_EXP_BIAS = 1023;
+  const int64_t DoubleConsts_SIGNIF_BIT_MASK = 0x000FFFFFFFFFFFFFL;
+  const int64_t MINUS_64 = 0xFFFFFFFFFFFFFFC0L;
+  Label L_special_case, L_block1, L_exit;
+  mov64(rtmp, DoubleConsts_EXP_BIT_MASK);
+  movq(dst, src);
+  andq(dst, rtmp);
+  sarq(dst, DoubleConsts_SIGNIFICAND_WIDTH - 1);
+  mov64(rtmp, DoubleConsts_SIGNIFICAND_WIDTH - 2 + DoubleConsts_EXP_BIAS);
+  subq(rtmp, dst);
+  movq(rcx, rtmp);
+  mov64(dst, MINUS_64);
+  testq(rtmp, dst);
+  jccb(Assembler::notEqual, L_special_case);
+  movq(dst, src);
+  mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK);
+  andq(dst, rtmp);
+  mov64(rtmp, DoubleConsts_SIGNIF_BIT_MASK + 1);
+  orq(dst, rtmp);
+  movq(rtmp, src);
+  testq(rtmp, rtmp);
+  jccb(Assembler::greaterEqual, L_block1);
+  negq(dst);
+  bind(L_block1);
+  sarq(dst);
+  addq(dst, 0x1);
+  sarq(dst, 0x1);
+  jmp(L_exit);
+  bind(L_special_case);
+  convert_d2l(dst, src);
+  bind(L_exit);
+}
+
 void MacroAssembler::convert_d2l(Register dst, XMMRegister src) {
   Label done;
   cvttsd2siq(dst, src);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 9b3da9d5de1..303b6b0c83c 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -906,7 +906,7 @@ public:
   void fld_x(AddressLiteral src);
 
   void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
-  void ldmxcsr(AddressLiteral src);
+  void ldmxcsr(AddressLiteral src, Register scratchReg = rscratch1);
 
 #ifdef _LP64
  private:
@@ -1994,6 +1994,8 @@ public:
   void convert_d2i(Register dst, XMMRegister src);
   void convert_f2l(Register dst, XMMRegister src);
   void convert_d2l(Register dst, XMMRegister src);
+  void round_double(Register dst, XMMRegister src, Register rtmp, Register rcx);
+  void round_float(Register dst, XMMRegister src, Register rtmp, Register rcx);
 
   void cache_wb(Address line);
   void cache_wbsync(bool is_pre);
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index ab28ebd5ca5..cf182d9880d 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1468,6 +1468,16 @@ const bool Matcher::match_rule_supported(int opcode) {
         return false;
       }
       break;
+    case Op_RoundVF:
+      if (UseAVX < 2) { // enabled for AVX2 only
+        return false;
+      }
+      break;
+    case Op_RoundVD:
+      if (UseAVX < 3) {
+        return false;  // enabled for AVX3 only
+      }
+      break;
     case Op_CompareAndSwapL:
 #ifdef _LP64
     case Op_CompareAndSwapP:
@@ -1572,6 +1582,12 @@ const bool Matcher::match_rule_supported(int opcode) {
          return false;
       }
       break;
+    case Op_RoundF:
+    case Op_RoundD:
+      if (!is_LP64) {
+        return false;
+      }
+      break;
     case Op_CopySignD:
     case Op_CopySignF:
       if (UseAVX < 3 || !is_LP64)  {
@@ -1817,6 +1833,11 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
         return false;
       }
       break;
+    case Op_RoundVD:
+      if (!VM_Version::supports_avx512dq()) {
+        return false;
+      }
+      break;
     case Op_VectorCastF2X:
       if (is_subword_type(bt) || bt == T_LONG) {
         return false;
@@ -7173,13 +7194,14 @@ instruct vcastFtoD_reg(vec dst, vec src) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
+
+instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
   predicate(!VM_Version::supports_avx512vl() &&
             Matcher::vector_length_in_bytes(n) < 64 &&
             Matcher::vector_element_basic_type(n) == T_INT);
   match(Set dst (VectorCastF2X src));
   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
-  format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %}
+  format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
   ins_encode %{
     int vlen_enc = vector_length_encoding(this);
     __ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7189,13 +7211,13 @@ instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, ve
   ins_pipe( pipe_slow );
 %}
 
-instruct vcastFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
   predicate((VM_Version::supports_avx512vl() ||
              Matcher::vector_length_in_bytes(n) == 64) &&
              Matcher::vector_element_basic_type(n) == T_INT);
   match(Set dst (VectorCastF2X src));
   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
-  format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
+  format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
   ins_encode %{
     int vlen_enc = vector_length_encoding(this);
     __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7216,11 +7238,11 @@ instruct vcastDtoF_reg(vec dst, vec src) %{
   ins_pipe( pipe_slow );
 %}
 
-instruct vcastDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
   predicate(Matcher::vector_element_basic_type(n) == T_LONG);
   match(Set dst (VectorCastD2X src));
   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
-  format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %}
+  format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
   ins_encode %{
     int vlen_enc = vector_length_encoding(this);
     __ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
@@ -7246,6 +7268,56 @@ instruct vucast(vec dst, vec src) %{
   ins_pipe( pipe_slow );
 %}
 
+#ifdef _LP64
+instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{
+  predicate(!VM_Version::supports_avx512vl() &&
+            Matcher::vector_length_in_bytes(n) < 64 &&
+            Matcher::vector_element_basic_type(n) == T_INT);
+  match(Set dst (RoundVF src));
+  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr);
+  format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %}
+  ins_encode %{
+    int vlen_enc = vector_length_encoding(this);
+    InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+    __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                              $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
+                              ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+  predicate((VM_Version::supports_avx512vl() ||
+             Matcher::vector_length_in_bytes(n) == 64) &&
+             Matcher::vector_element_basic_type(n) == T_INT);
+  match(Set dst (RoundVF src));
+  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
+  format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
+  ins_encode %{
+    int vlen_enc = vector_length_encoding(this);
+    InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+    __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                               $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
+                               ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{
+  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
+  match(Set dst (RoundVD src));
+  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr);
+  format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %}
+  ins_encode %{
+    int vlen_enc = vector_length_encoding(this);
+    InternalAddress new_mxcsr = $constantaddress((jint)0x3F80);
+    __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
+                                $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister,
+                                ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#endif
 // --------------------------------- VectorMaskCmp --------------------------------------
 
 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 09ff7075994..62132ea930f 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -10821,6 +10821,28 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
   ins_pipe(pipe_slow);
 %}
 
+instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
+%{
+  match(Set dst (RoundD src));
+  effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
+  format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
+  ins_encode %{
+    __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
+%{
+  match(Set dst (RoundF src));
+  effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
+  format %{ "round_float $dst,$src" %}
+  ins_encode %{
+    __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 instruct convI2F_reg_reg(regF dst, rRegI src)
 %{
   predicate(!UseXmmI2F);
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index 7ae8d123642..ba65dd706fb 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -4239,6 +4239,7 @@ bool MatchRule::is_vector() const {
     "FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
     // Next are vector mask ops.
     "MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
+    "RoundVF", "RoundVD",
     // Next are not supported currently.
     "PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
     "ExtractB","ExtractUB","ExtractC","ExtractS","ExtractI","ExtractL","ExtractF","ExtractD"
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 5b2c6a9ce56..b2757ca86bc 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -134,6 +134,7 @@ class methodHandle;
   do_name(log_name,"log")       do_name(log10_name,"log10")     do_name(pow_name,"pow")                                 \
   do_name(exp_name,"exp")       do_name(min_name,"min")         do_name(max_name,"max")                                 \
   do_name(floor_name, "floor")  do_name(ceil_name, "ceil")      do_name(rint_name, "rint")                              \
+  do_name(round_name, "round")                                                                                          \
                                                                                                                         \
   do_name(addExact_name,"addExact")                                                                                     \
   do_name(decrementExact_name,"decrementExact")                                                                         \
@@ -185,6 +186,8 @@ class methodHandle;
   do_intrinsic(_minF,                     java_lang_Math,         min_name,           float2_float_signature,    F_S)   \
   do_intrinsic(_maxD,                     java_lang_Math,         max_name,           double2_double_signature,  F_S)   \
   do_intrinsic(_minD,                     java_lang_Math,         min_name,           double2_double_signature,  F_S)   \
+  do_intrinsic(_roundD,                   java_lang_Math,         round_name,         double_long_signature,     F_S)   \
+  do_intrinsic(_roundF,                   java_lang_Math,         round_name,         float_int_signature,       F_S)   \
   do_intrinsic(_dcopySign,                java_lang_Math,         copySign_name,      double2_double_signature,  F_S)   \
   do_intrinsic(_fcopySign,                java_lang_Math,         copySign_name,      float2_float_signature,    F_S)   \
   do_intrinsic(_dsignum,                  java_lang_Math,         signum_name,        double_double_signature,   F_S)   \
diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
index 94e7fb4f4a0..dfa2a86e602 100644
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -523,6 +523,8 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
   case vmIntrinsics::_dlog:
   case vmIntrinsics::_dlog10:
   case vmIntrinsics::_dpow:
+  case vmIntrinsics::_roundD:
+  case vmIntrinsics::_roundF:
   case vmIntrinsics::_min:
   case vmIntrinsics::_max:
   case vmIntrinsics::_min_strict:
diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp
index 5cedd71ec56..a268399f1b9 100644
--- a/src/hotspot/share/opto/classes.hpp
+++ b/src/hotspot/share/opto/classes.hpp
@@ -311,6 +311,8 @@ macro(SignumD)
 macro(SignumF)
 macro(SqrtD)
 macro(SqrtF)
+macro(RoundF)
+macro(RoundD)
 macro(Start)
 macro(StartOSR)
 macro(StoreB)
@@ -446,6 +448,8 @@ macro(ReplicateI)
 macro(ReplicateL)
 macro(ReplicateF)
 macro(ReplicateD)
+macro(RoundVF)
+macro(RoundVD)
 macro(Extract)
 macro(ExtractB)
 macro(ExtractUB)
diff --git a/src/hotspot/share/opto/convertnode.hpp b/src/hotspot/share/opto/convertnode.hpp
index fb670191fc3..87fca38d43f 100644
--- a/src/hotspot/share/opto/convertnode.hpp
+++ b/src/hotspot/share/opto/convertnode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -81,6 +81,14 @@ class ConvD2LNode : public Node {
   virtual uint ideal_reg() const { return Op_RegL; }
 };
 
+class RoundDNode : public Node {
+  public:
+  RoundDNode( Node *dbl ) : Node(0,dbl) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
 //------------------------------ConvF2DNode------------------------------------
 // Convert Float to a Double.
 class ConvF2DNode : public Node {
@@ -105,6 +113,7 @@ class ConvF2INode : public Node {
   virtual uint  ideal_reg() const { return Op_RegI; }
 };
 
+
 //------------------------------ConvF2LNode------------------------------------
 // Convert float to long
 class ConvF2LNode : public Node {
@@ -141,6 +150,14 @@ class ConvI2FNode : public Node {
   virtual uint  ideal_reg() const { return Op_RegF; }
 };
 
+class RoundFNode : public Node {
+  public:
+  RoundFNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint  ideal_reg() const { return Op_RegI; }
+};
+
 //------------------------------ConvI2LNode------------------------------------
 // Convert integer to long
 class ConvI2LNode : public TypeNode {
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
index 2f7c7fdc84c..fd23e272c71 100644
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -269,6 +269,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
   case vmIntrinsics::_dcopySign:
   case vmIntrinsics::_fcopySign:
   case vmIntrinsics::_dsignum:
+  case vmIntrinsics::_roundF:
+  case vmIntrinsics::_roundD:
   case vmIntrinsics::_fsignum:                  return inline_math_native(intrinsic_id());
 
   case vmIntrinsics::_notify:
@@ -1605,6 +1607,7 @@ Node* LibraryCallKit::round_double_node(Node* n) {
 // public static double Math.sqrt(double)
 // public static double Math.log(double)
 // public static double Math.log10(double)
+// public static double Math.round(double)
 bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
   Node* arg = round_double_node(argument(0));
   Node* n = NULL;
@@ -1616,6 +1619,7 @@ bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
   case vmIntrinsics::_ceil:   n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_ceil); break;
   case vmIntrinsics::_floor:  n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break;
   case vmIntrinsics::_rint:   n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break;
+  case vmIntrinsics::_roundD: n = new RoundDNode(arg); break;
   case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break;
   case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break;
   default:  fatal_unexpected_iid(id);  break;
@@ -1637,6 +1641,7 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
   case vmIntrinsics::_labs:   n = new AbsLNode(                arg);  break;
   case vmIntrinsics::_fcopySign: n = new CopySignFNode(arg, argument(1)); break;
   case vmIntrinsics::_fsignum: n = SignumFNode::make(_gvn, arg); break;
+  case vmIntrinsics::_roundF: n = new RoundFNode(arg); break;
   default:  fatal_unexpected_iid(id);  break;
   }
   set_result(_gvn.transform(n));
@@ -1752,9 +1757,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
       runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
 
     // These intrinsics are supported on all hardware
+  case vmIntrinsics::_roundD: return Matcher::match_rule_supported(Op_RoundD) ? inline_double_math(id) : false;
   case vmIntrinsics::_ceil:
   case vmIntrinsics::_floor:
   case vmIntrinsics::_rint:   return Matcher::match_rule_supported(Op_RoundDoubleMode) ? inline_double_math(id) : false;
+
   case vmIntrinsics::_dsqrt:
   case vmIntrinsics::_dsqrt_strict:
                               return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
@@ -1774,6 +1781,7 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
   case vmIntrinsics::_fcopySign: return inline_math(id);
   case vmIntrinsics::_dsignum: return Matcher::match_rule_supported(Op_SignumD) ? inline_double_math(id) : false;
   case vmIntrinsics::_fsignum: return Matcher::match_rule_supported(Op_SignumF) ? inline_math(id) : false;
+  case vmIntrinsics::_roundF: return Matcher::match_rule_supported(Op_RoundF) ? inline_math(id) : false;
 
    // These intrinsics are not yet correctly implemented
   case vmIntrinsics::_datan2:
diff --git a/src/hotspot/share/opto/loopTransform.cpp b/src/hotspot/share/opto/loopTransform.cpp
index 253340a6be5..a9839941669 100644
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@@ -970,6 +970,10 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
       case Op_ModL: body_size += 30; break;
       case Op_DivL: body_size += 30; break;
       case Op_MulL: body_size += 10; break;
+      case Op_RoundF: body_size += 30; break;
+      case Op_RoundD: body_size += 30; break;
+      case Op_RoundVF: body_size += 30; break;
+      case Op_RoundVD: body_size += 30; break;
       case Op_PopCountVI:
       case Op_PopCountVL: {
         const TypeVect* vt = n->bottom_type()->is_vect();
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index 8a116212783..1459e3b7541 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -2563,6 +2563,7 @@ bool SuperWord::output() {
                  opc == Op_AbsF || opc == Op_AbsD ||
                  opc == Op_AbsI || opc == Op_AbsL ||
                  opc == Op_NegF || opc == Op_NegD ||
+                 opc == Op_RoundF || opc == Op_RoundD ||
                  opc == Op_PopCountI || opc == Op_PopCountL) {
         assert(n->req() == 2, "only one input expected");
         Node* in = vector_opd(p, 1);
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index ae9bb537959..8b385f28f40 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -157,6 +157,10 @@ int VectorNode::opcode(int sopc, BasicType bt) {
     return (bt == T_FLOAT ? Op_SqrtVF : 0);
   case Op_SqrtD:
     return (bt == T_DOUBLE ? Op_SqrtVD : 0);
+  case Op_RoundF:
+    return (bt == T_INT ? Op_RoundVF : 0);
+  case Op_RoundD:
+    return (bt == T_LONG ? Op_RoundVD : 0);
   case Op_PopCountI:
     // Unimplemented for subword types since bit count changes
     // depending on size of lane (and sign bit).
@@ -585,6 +589,9 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
   case Op_SqrtVF: return new SqrtVFNode(n1, vt);
   case Op_SqrtVD: return new SqrtVDNode(n1, vt);
 
+  case Op_RoundVF: return new RoundVFNode(n1, vt);
+  case Op_RoundVD: return new RoundVDNode(n1, vt);
+
   case Op_PopCountVI: return new PopCountVINode(n1, vt);
   case Op_PopCountVL: return new PopCountVLNode(n1, vt);
   case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp
index d853a71f8cc..74fd1f63991 100644
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@@ -1544,6 +1544,14 @@ class VectorCastD2XNode : public VectorCastNode {
   virtual int Opcode() const;
 };
 
+class RoundVFNode : public VectorNode {
+ public:
+  RoundVFNode(Node* in, const TypeVect* vt) :VectorNode(in, vt) {
+    assert(in->bottom_type()->is_vect()->element_basic_type() == T_FLOAT, "must be float");
+  }
+  virtual int Opcode() const;
+};
+
 class VectorUCastB2XNode : public VectorCastNode {
  public:
   VectorUCastB2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
@@ -1552,6 +1560,14 @@ class VectorUCastB2XNode : public VectorCastNode {
   virtual int Opcode() const;
 };
 
+class RoundVDNode : public VectorNode {
+ public:
+  RoundVDNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {
+    assert(in->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE, "must be double");
+  }
+  virtual int Opcode() const;
+};
+
 class VectorUCastS2XNode : public VectorCastNode {
  public:
   VectorUCastS2XNode(Node* in, const TypeVect* vt) : VectorCastNode(in, vt) {
diff --git a/src/java.base/share/classes/java/lang/Math.java b/src/java.base/share/classes/java/lang/Math.java
index 6d8fa48c976..4155e8616f5 100644
--- a/src/java.base/share/classes/java/lang/Math.java
+++ b/src/java.base/share/classes/java/lang/Math.java
@@ -753,6 +753,7 @@ public final class Math {
      * @see     java.lang.Integer#MAX_VALUE
      * @see     java.lang.Integer#MIN_VALUE
      */
+    @IntrinsicCandidate
     public static int round(float a) {
         int intBits = Float.floatToRawIntBits(a);
         int biasedExp = (intBits & FloatConsts.EXP_BIT_MASK)
@@ -802,6 +803,7 @@ public final class Math {
      * @see     java.lang.Long#MAX_VALUE
      * @see     java.lang.Long#MIN_VALUE
      */
+    @IntrinsicCandidate
     public static long round(double a) {
         long longBits = Double.doubleToRawLongBits(a);
         long biasedExp = (longBits & DoubleConsts.EXP_BIT_MASK)
diff --git a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java
index 8822b38a99a..cc6f860a6a9 100644
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestDoubleVect.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,14 +44,16 @@ public class TestDoubleVect {
     System.out.println("Testing Double vectors");
     int errn = test();
     if (errn > 0) {
-      System.err.println("FAILED: " + errn + " errors");
-      System.exit(97);
+        System.err.println("FAILED: " + errn + " errors");
+        System.exit(97);
     }
     System.out.println("PASSED");
   }
 
   static int test() {
     double[] a0 = new double[ARRLEN];
+    long  [] l0 = new long[ARRLEN];
+
     double[] a1 = new double[ARRLEN];
     double[] a2 = new double[ARRLEN];
     double[] a3 = new double[ARRLEN];
@@ -91,6 +93,7 @@ public class TestDoubleVect {
       test_ceil(a0, a1);
       test_floor(a0, a1);
       test_sqrt(a0, a1);
+      test_round(l0, a1);
     }
     // Test and verify results
     System.out.println("Verification");
@@ -355,6 +358,7 @@ public class TestDoubleVect {
         errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
       }
 
+
       // To test -ve and +ve Zero scenarios.
       double [] other_corner_cases     = { -0.0, 0.0, 9.007199254740992E15 };
       double [] other_corner_cases_res = new double[3];
@@ -421,6 +425,35 @@ public class TestDoubleVect {
       for (int i=8; i<ARRLEN; i++) {
         errn += verify("test_sqrt: ", i, a0[i], Math.sqrt((double)(ADD_INIT+i)));
       }
+
+      a1[6] = +0x1.fffffffffffffp-2;
+      a1[7] = +0x1.0p-1;
+      a1[8] = +0x1.0000000000001p-1;
+      a1[9] = -0x1.fffffffffffffp-2;
+      a1[10] = -0x1.0p-1;
+      a1[11] = -0x1.0000000000001p-1;
+      a1[12] = 1.7976931348623157E19;
+      a1[13] = -1.7976931348623157E19;
+
+      test_round(l0, a1);
+      errn += verify("test_round: ", 0, l0[0], 0L);
+      errn += verify("test_round: ", 1, l0[1], Long.MAX_VALUE);
+      errn += verify("test_round: ", 2, l0[2], Long.MIN_VALUE);
+      errn += verify("test_round: ", 3, l0[3], Long.MAX_VALUE);
+      errn += verify("test_round: ", 4, l0[4], 0L);
+      errn += verify("test_round: ", 5, l0[5], 0L);
+
+      errn += verify("test_round: ", 6, l0[6], 0L);
+      errn += verify("test_round: ", 7, l0[7], 1L);
+      errn += verify("test_round: ", 8, l0[8], 1L);
+      errn += verify("test_round: ", 9, l0[9], 0L);
+      errn += verify("test_round: ", 10, l0[10], 0L);
+      errn += verify("test_round: ", 11, l0[11], -1L);
+      errn += verify("test_round: ", 12, l0[12], Long.MAX_VALUE);
+      errn += verify("test_round: ", 13, l0[13], Long.MIN_VALUE);
+      for (int i=14; i<ARRLEN; i++) {
+        errn += verify("test_round: ", i, l0[i], Math.round((double)(ADD_INIT+i)));
+      }
     }
 
     if (errn > 0)
@@ -564,6 +597,12 @@ public class TestDoubleVect {
     end = System.currentTimeMillis();
     System.out.println("test_sqrt_n: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_round(l0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_round_n: " + (end - start));
     return errn;
   }
 
@@ -691,6 +730,20 @@ public class TestDoubleVect {
     }
   }
 
+  static void test_round(long[] a0, double[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.round(a1[i]);
+    }
+  }
+
+  static int verify(String text, int i, long elem, long val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
   static int verify(String text, int i, double elem, double val) {
     if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
diff --git a/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java b/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java
index a10da62e788..48bb8bdbc12 100644
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestFloatVect.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,6 +52,7 @@ public class TestFloatVect {
 
   static int test() {
     float[] a0 = new float[ARRLEN];
+    int[] i0 = new int[ARRLEN];
     float[] a1 = new float[ARRLEN];
     float[] a2 = new float[ARRLEN];
     float[] a3 = new float[ARRLEN];
@@ -88,7 +89,9 @@ public class TestFloatVect {
       test_diva(a0, a1, a3);
       test_negc(a0, a1);
       test_sqrt(a0, a1);
+      test_round(i0, a1);
     }
+
     // Test and verify results
     System.out.println("Verification");
     int errn = 0;
@@ -369,6 +372,35 @@ public class TestFloatVect {
         errn += verify("test_sqrt: ", i, a0[i], (float)(Math.sqrt((double)(ADD_INIT+i))));
       }
 
+      a1[6] = +0x1.fffffep-2f;
+      a1[7] = +0x1.0p-1f;
+      a1[8] = +0x1.000002p-1f;
+      a1[9] = -0x1.fffffep-2f;
+      a1[10] = -0x1.0p-1f;
+      a1[11] = -0x1.000002p-1f;
+      a1[12] = 3.4028235E10f;
+      a1[13] = -3.4028235E10f;
+
+      test_round(i0, a1);
+      errn += verify("test_round: ", 0, i0[0], 0);
+      errn += verify("test_round: ", 1, i0[1], Integer.MAX_VALUE);
+      errn += verify("test_round: ", 2, i0[2], Integer.MIN_VALUE);
+      errn += verify("test_round: ", 3, i0[3], Integer.MAX_VALUE);
+      errn += verify("test_round: ", 4, i0[4], 0);
+      errn += verify("test_round: ", 5, i0[5], 0);
+      errn += verify("test_round: ", 6, i0[6], 0);
+      errn += verify("test_round: ", 7, i0[7], 1);
+      errn += verify("test_round: ", 8, i0[8], 1);
+      errn += verify("test_round: ", 9, i0[9], 0);
+      errn += verify("test_round: ", 10, i0[10], 0);
+      errn += verify("test_round: ", 11, i0[11], -1);
+      errn += verify("test_round: ", 12, i0[12], Integer.MAX_VALUE);
+      errn += verify("test_round: ", 13, i0[13], Integer.MIN_VALUE);
+
+      for (int i=14; i<ARRLEN; i++) {
+        errn += verify("test_round: ", i, i0[i], Math.round(((float)(ADD_INIT+i))));
+      }
+
     }
 
     if (errn > 0)
@@ -512,6 +544,12 @@ public class TestFloatVect {
     end = System.currentTimeMillis();
     System.out.println("test_sqrt_n: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_round(i0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_round_n: " + (end - start));
     return errn;
   }
 
@@ -609,6 +647,20 @@ public class TestFloatVect {
     }
   }
 
+  static void test_round(int[] a0, float[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = Math.round(a1[i]);
+    }
+  }
+
+  static int verify(String text, int i, int elem, int val) {
+    if (elem != val) {
+      System.err.println(text + "[" + i + "] = " + elem + " != " + val);
+      return 1;
+    }
+    return 0;
+  }
+
   static int verify(String text, int i, float elem, float val) {
     if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
diff --git a/test/hotspot/jtreg/compiler/vectorization/TestRoundVectDouble.java b/test/hotspot/jtreg/compiler/vectorization/TestRoundVectDouble.java
new file mode 100644
index 00000000000..83fb2790aa6
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorization/TestRoundVectDouble.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8279508
+ * @summary Auto-vectorize Math.round API
+ * @requires vm.compiler2.enabled
+ * @requires vm.cpu.features ~= ".*avx512dq.*"
+ * @requires os.simpleArch == "x64"
+ * @library /test/lib /
+ * @run driver compiler.vectorization.TestRoundVectDouble
+ */
+
+package compiler.vectorization;
+
+import compiler.lib.ir_framework.*;
+
+public class TestRoundVectDouble {
+  private static final int ARRLEN = 1024;
+  private static final int ITERS  = 11000;
+
+  private static double [] dinp;
+  private static long   [] lout;
+
+  public static void main(String args[]) {
+      TestFramework.runWithFlags("-XX:-TieredCompilation",
+                                  "-XX:UseAVX=3",
+                                  "-XX:CompileThresholdScaling=0.3");
+      System.out.println("PASSED");
+  }
+
+  @Test
+  @IR(applyIf = {"UseAVX", "3"}, counts = {"RoundVD" , " > 0 "})
+  public void test_round_double(long[] lout, double[] dinp) {
+      for (int i = 0; i < lout.length; i+=1) {
+          lout[i] = Math.round(dinp[i]);
+      }
+  }
+
+  @Run(test = {"test_round_double"}, mode = RunMode.STANDALONE)
+  public void kernel_test_round_double() {
+      dinp = new double[ARRLEN];
+      lout = new long[ARRLEN];
+      for(int i = 0 ; i < ARRLEN; i++) {
+          dinp[i] = (double)i*1.4;
+      }
+      for (int i = 0; i < ITERS; i++) {
+          test_round_double(lout , dinp);
+      }
+  }
+}
diff --git a/test/hotspot/jtreg/compiler/vectorization/TestRoundVectFloat.java b/test/hotspot/jtreg/compiler/vectorization/TestRoundVectFloat.java
new file mode 100644
index 00000000000..78e8d7b55cc
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorization/TestRoundVectFloat.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8279508
+ * @summary Auto-vectorize Math.round API
+ * @requires vm.compiler2.enabled
+ * @requires vm.cpu.features ~= ".*avx.*"
+ * @requires os.simpleArch == "x64"
+ * @library /test/lib /
+ * @run driver compiler.vectorization.TestRoundVectFloat
+ */
+
+package compiler.vectorization;
+
+import compiler.lib.ir_framework.*;
+
+public class TestRoundVectFloat {
+  private static final int ARRLEN = 1024;
+  private static final int ITERS  = 11000;
+  private static float  [] finp;
+  private static int    [] iout;
+
+  public static void main(String args[]) {
+      TestFramework.runWithFlags("-XX:-TieredCompilation",
+                                 "-XX:UseAVX=1",
+                                 "-XX:CompileThresholdScaling=0.3");
+      System.out.println("PASSED");
+  }
+
+  @Test
+  @IR(applyIf = {"UseAVX", " > 1"}, counts = {"RoundVF" , " > 0 "})
+  public void test_round_float(int[] iout, float[] finp) {
+      for (int i = 0; i < finp.length; i+=1) {
+          iout[i] = Math.round(finp[i]);
+      }
+  }
+
+  @Run(test = {"test_round_float"}, mode = RunMode.STANDALONE)
+  public void kernel_test_round() {
+      finp = new float[ARRLEN];
+      iout = new int[ARRLEN];
+      for(int i = 0 ; i < ARRLEN; i++) {
+          finp[i] = (float)i*1.4f;
+      }
+      for (int i = 0; i < ITERS; i++) {
+          test_round_float(iout , finp);
+      }
+  }
+}
diff --git a/test/jdk/java/lang/Math/RoundTests.java b/test/jdk/java/lang/Math/RoundTests.java
index 0a51b426386..e8f4e817a66 100644
--- a/test/jdk/java/lang/Math/RoundTests.java
+++ b/test/jdk/java/lang/Math/RoundTests.java
@@ -25,20 +25,22 @@
  * @test
  * @bug 6430675 8010430
  * @summary Check for correct implementation of {Math, StrictMath}.round
+ * @run main/othervm -XX:Tier3CompileThreshold=50 -XX:CompileThresholdScaling=0.01 -XX:+TieredCompilation RoundTests
  */
 public class RoundTests {
     public static void main(String... args) {
         int failures = 0;
+        for (int i = 0; i < 500; i++) {
+            failures += testNearFloatHalfCases();
+            failures += testNearDoubleHalfCases();
+            failures += testUnityULPCases();
+            failures += testSpecialCases();
 
-        failures += testNearFloatHalfCases();
-        failures += testNearDoubleHalfCases();
-        failures += testUnityULPCases();
-        failures += testSpecialCases();
-
-        if (failures > 0) {
-            System.err.println("Testing {Math, StrictMath}.round incurred "
-                               + failures + " failures.");
-            throw new RuntimeException();
+            if (failures > 0) {
+                System.err.println("Testing {Math, StrictMath}.round incurred "
+                                   + failures + " failures.");
+                throw new RuntimeException();
+            }
         }
     }
 
diff --git a/test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java b/test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java
index cf0eed32e07..8381673028e 100644
--- a/test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java
+++ b/test/micro/org/openjdk/bench/java/math/FpRoundingBenchmark.java
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -26,53 +26,97 @@ package org.openjdk.bench.java.math;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
 import org.openjdk.jmh.annotations.*;
-import org.openjdk.jmh.infra.Blackhole;
 
 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 @State(Scope.Thread)
 public class FpRoundingBenchmark {
 
-  @Param({"1024"})
+  @Param({"1024", "2048"})
   public int TESTSIZE;
 
   public double[] DargV1;
-
-  public double[] Res;
+  public double[] ResD;
+  public long[] ResL;
+  public float[] FargV1;
+  public float[] ResF;
+  public int[] ResI;
 
   public final double[] DspecialVals = {
-      0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY};
+      0.0, -0.0, Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY,
+      Double.MAX_VALUE, -Double.MAX_VALUE, Double.MIN_VALUE, -Double.MIN_VALUE,
+      Double.MIN_NORMAL
+  };
+
+  public final float[] FspecialVals = {
+      0.0f, -0.0f, Float.NaN, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY,
+      Float.MAX_VALUE, -Float.MAX_VALUE, Float.MIN_VALUE, -Float.MIN_VALUE,
+      Float.MIN_NORMAL
+  };
 
   @Setup(Level.Trial)
   public void BmSetup() {
-    int i = 0;
-    Random r = new Random(1024);
-    DargV1 = new double[TESTSIZE];
-    Res = new double[TESTSIZE];
+      int i = 0;
+      Random r = new Random(1024);
 
-    for (; i < DspecialVals.length; i++) {
-      DargV1[i] = DspecialVals[i];
-    }
+      DargV1 = new double[TESTSIZE];
+      ResD = new double[TESTSIZE];
 
-    for (; i < TESTSIZE; i++) {
-      DargV1[i] = r.nextDouble()*TESTSIZE;
-    }
+      for (; i < DspecialVals.length; i++) {
+          DargV1[i] = DspecialVals[i];
+      }
+
+      for (; i < TESTSIZE; i++) {
+          DargV1[i] = Double.longBitsToDouble(r.nextLong());;
+      }
+
+      FargV1 = new float[TESTSIZE];
+      ResF = new float[TESTSIZE];
+
+      i = 0;
+      for (; i < FspecialVals.length; i++) {
+          FargV1[i] = FspecialVals[i];
+      }
+
+      for (; i < TESTSIZE; i++) {
+          FargV1[i] = Float.intBitsToFloat(r.nextInt());
+      }
+
+      ResI = new int[TESTSIZE];
+      ResL = new long[TESTSIZE];
   }
 
   @Benchmark
-  public void testceil(Blackhole bh) {
-    for (int i = 0; i < TESTSIZE; i++)
-      Res[i] = Math.ceil(DargV1[i]);
+  public void test_ceil() {
+      for (int i = 0; i < TESTSIZE; i++) {
+          ResD[i] = Math.ceil(DargV1[i]);
+      }
   }
 
   @Benchmark
-  public void testfloor(Blackhole bh) {
-    for (int i = 0; i < TESTSIZE; i++)
-      Res[i] = Math.floor(DargV1[i]);
+  public void test_floor() {
+      for (int i = 0; i < TESTSIZE; i++) {
+          ResD[i] = Math.floor(DargV1[i]);
+      }
   }
 
   @Benchmark
-  public void testrint(Blackhole bh) {
-    for (int i = 0; i < TESTSIZE; i++)
-      Res[i] = Math.rint(DargV1[i]);
+  public void test_rint() {
+      for (int i = 0; i < TESTSIZE; i++) {
+          ResD[i] = Math.rint(DargV1[i]);
+      }
+  }
+
+  @Benchmark
+  public void test_round_double() {
+      for (int i = 0; i < TESTSIZE; i++) {
+          ResL[i] = Math.round(DargV1[i]);
+      }
+  }
+
+  @Benchmark
+  public void test_round_float() {
+      for (int i = 0; i < TESTSIZE; i++) {
+          ResI[i] = Math.round(FargV1[i]);
+      }
   }
 }