From 5d82d67a9e1303e235f475c199eb1435c3d69006 Mon Sep 17 00:00:00 2001
From: Jatin Bhateja <jbhateja@openjdk.org>
Date: Thu, 28 Jul 2022 04:43:01 +0000
Subject: [PATCH] 8290034: Auto vectorize reverse bit operations.

Reviewed-by: xgong, kvn
---
 src/hotspot/cpu/x86/assembler_x86.cpp         |   8 +
 src/hotspot/cpu/x86/assembler_x86.hpp         |   1 +
 src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp |  84 +++++++++
 src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp |   4 +
 src/hotspot/cpu/x86/x86_64.ad                 |  44 +++++
 src/hotspot/share/classfile/vmIntrinsics.hpp  |   3 +
 src/hotspot/share/opto/c2compiler.cpp         |   6 +
 src/hotspot/share/opto/library_call.cpp       |   4 +
 src/hotspot/share/opto/subnode.cpp            |  47 +++++
 src/hotspot/share/opto/subnode.hpp            |   4 +
 src/hotspot/share/opto/superword.cpp          |   1 +
 .../share/classes/java/lang/Integer.java      |   1 +
 .../share/classes/java/lang/Long.java         |   1 +
 .../compiler/c2/cr6340864/TestIntVect.java    |  17 ++
 .../compiler/c2/cr6340864/TestLongVect.java   |  17 ++
 .../vectorization/TestReverseBitsVector.java  | 169 ++++++++++++++++++
 .../org/openjdk/bench/java/lang/Integers.java |   7 +
 .../org/openjdk/bench/java/lang/Longs.java    |   7 +
 18 files changed, 425 insertions(+)
 create mode 100644 test/hotspot/jtreg/compiler/vectorization/TestReverseBitsVector.java

diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index a2ef0aad79b..d1e4e24b010 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -10115,6 +10115,14 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist
   emit_int8(imm8);
 }
 
+void Assembler::gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_gfni(), "");
+  assert(VM_Version::supports_sse(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8);
+}
+
 void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) {
   assert(VM_Version::supports_gfni(), "requires GFNI support");
   assert(VM_Version::supports_sse(), "");
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index f37387c55d0..737c0a697cf 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -2801,6 +2801,7 @@ private:
   void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
 
   // Galois field affine transformation instructions.
+  void gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8);
   void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
 
  protected:
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 2b34d16c5f6..8284cd071c2 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -5484,6 +5484,90 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R
 }
 
 #ifdef _LP64
+void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1,
+                                 XMMRegister xtmp2, Register rtmp) {
+  if(VM_Version::supports_gfni()) {
+    // Galois field instruction based bit reversal based on following algorithm.
+    // http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
+    mov64(rtmp, 0x8040201008040201L);
+    movq(xtmp1, src);
+    movq(xtmp2, rtmp);
+    gf2p8affineqb(xtmp1, xtmp2, 0);
+    movq(dst, xtmp1);
+  } else {
+    // Swap even and odd numbered bits.
+    movl(rtmp, src);
+    andl(rtmp, 0x55555555);
+    shll(rtmp, 1);
+    movl(dst, src);
+    andl(dst, 0xAAAAAAAA);
+    shrl(dst, 1);
+    orl(dst, rtmp);
+
+    // Swap LSB and MSB 2 bits of each nibble.
+    movl(rtmp, dst);
+    andl(rtmp, 0x33333333);
+    shll(rtmp, 2);
+    andl(dst, 0xCCCCCCCC);
+    shrl(dst, 2);
+    orl(dst, rtmp);
+
+    // Swap LSB and MSB 4 bits of each byte.
+    movl(rtmp, dst);
+    andl(rtmp, 0x0F0F0F0F);
+    shll(rtmp, 4);
+    andl(dst, 0xF0F0F0F0);
+    shrl(dst, 4);
+    orl(dst, rtmp);
+  }
+  bswapl(dst);
+}
+
+void C2_MacroAssembler::reverseL(Register dst, Register src, XMMRegister xtmp1,
+                                 XMMRegister xtmp2, Register rtmp1, Register rtmp2) {
+  if(VM_Version::supports_gfni()) {
+    // Galois field instruction based bit reversal based on following algorithm.
+    // http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html
+    mov64(rtmp1, 0x8040201008040201L);
+    movq(xtmp1, src);
+    movq(xtmp2, rtmp1);
+    gf2p8affineqb(xtmp1, xtmp2, 0);
+    movq(dst, xtmp1);
+  } else {
+    // Swap even and odd numbered bits.
+    movq(rtmp1, src);
+    mov64(rtmp2, 0x5555555555555555L);
+    andq(rtmp1, rtmp2);
+    shlq(rtmp1, 1);
+    movq(dst, src);
+    notq(rtmp2);
+    andq(dst, rtmp2);
+    shrq(dst, 1);
+    orq(dst, rtmp1);
+
+    // Swap LSB and MSB 2 bits of each nibble.
+    movq(rtmp1, dst);
+    mov64(rtmp2, 0x3333333333333333L);
+    andq(rtmp1, rtmp2);
+    shlq(rtmp1, 2);
+    notq(rtmp2);
+    andq(dst, rtmp2);
+    shrq(dst, 2);
+    orq(dst, rtmp1);
+
+    // Swap LSB and MSB 4 bits of each byte.
+    movq(rtmp1, dst);
+    mov64(rtmp2, 0x0F0F0F0F0F0F0F0FL);
+    andq(rtmp1, rtmp2);
+    shlq(rtmp1, 4);
+    notq(rtmp2);
+    andq(dst, rtmp2);
+    shrq(dst, 4);
+    orq(dst, rtmp1);
+  }
+  bswapq(dst);
+}
+
 void C2_MacroAssembler::udivL(Register rax, Register divisor, Register rdx) {
   Label done;
   Label neg_divisor_fastpath;
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index ba2684915a0..21b294a7ff7 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -368,6 +368,10 @@ public:
   void udivmodI(Register rax, Register divisor, Register rdx, Register tmp);
 
 #ifdef _LP64
+  void reverseI(Register dst, Register src, XMMRegister xtmp1,
+                XMMRegister xtmp2, Register rtmp);
+  void reverseL(Register dst, Register src, XMMRegister xtmp1,
+                XMMRegister xtmp2, Register rtmp1, Register rtmp2);
   void udivL(Register rax, Register divisor, Register rdx);
   void umodL(Register rax, Register divisor, Register rdx);
   void udivmodL(Register rax, Register divisor, Register rdx, Register tmp);
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index bc9f215b35a..b522c072a9f 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -6721,6 +6721,50 @@ instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
   ins_pipe(ialu_reg);
 %}
 
+//--------------- Reverse Operation Instructions ----------------
+instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
+  predicate(!VM_Version::supports_gfni());
+  match(Set dst (ReverseI src));
+  effect(TEMP dst, TEMP rtmp, KILL cr);
+  format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
+  ins_encode %{
+    __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
+  %}
+  ins_pipe( ialu_reg );
+%}
+
+instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
+  predicate(VM_Version::supports_gfni());
+  match(Set dst (ReverseI src));
+  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
+  format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
+  ins_encode %{
+    __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
+  %}
+  ins_pipe( ialu_reg );
+%}
+
+instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
+  predicate(!VM_Version::supports_gfni());
+  match(Set dst (ReverseL src));
+  effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
+  format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
+  ins_encode %{
+    __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
+  %}
+  ins_pipe( ialu_reg );
+%}
+
+instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
+  predicate(VM_Version::supports_gfni());
+  match(Set dst (ReverseL src));
+  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
+  format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
+  ins_encode %{
+    __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
+  %}
+  ins_pipe( ialu_reg );
+%}
 
 //---------- Population Count Instructions -------------------------------------
 
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index 036d09a3c87..8f9a1b3a4ec 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -246,6 +246,9 @@ class methodHandle;
   do_intrinsic(_expand_i,                 java_lang_Integer,      expand_name,              int2_int_signature,   F_S)  \
   do_intrinsic(_expand_l,                 java_lang_Long,         expand_name,              long2_long_signature, F_S)  \
                                                                                                                         \
+  do_intrinsic(_reverse_i,                java_lang_Integer,      reverse_name,             int_int_signature,   F_S)   \
+   do_name(     reverse_name,                                    "reverse")                                             \
+  do_intrinsic(_reverse_l,                java_lang_Long,         reverse_name,             long_long_signature, F_S)   \
   do_intrinsic(_reverseBytes_i,           java_lang_Integer,      reverseBytes_name,        int_int_signature,   F_S)   \
    do_name(     reverseBytes_name,                               "reverseBytes")                                        \
   do_intrinsic(_reverseBytes_l,           java_lang_Long,         reverseBytes_name,        long_long_signature, F_S)   \
diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
index 1e3c2e59c82..890ef39fafd 100644
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@@ -263,6 +263,12 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
   case vmIntrinsics::_numberOfTrailingZeros_l:
     if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
     break;
+  case vmIntrinsics::_reverse_i:
+    if (!Matcher::match_rule_supported(Op_ReverseI)) return false;
+    break;
+  case vmIntrinsics::_reverse_l:
+    if (!Matcher::match_rule_supported(Op_ReverseL)) return false;
+    break;
   case vmIntrinsics::_reverseBytes_c:
     if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
     break;
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
index 87b546b94bb..9126351a19a 100644
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -525,6 +525,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
   case vmIntrinsics::_numberOfTrailingZeros_l:
   case vmIntrinsics::_bitCount_i:
   case vmIntrinsics::_bitCount_l:
+  case vmIntrinsics::_reverse_i:
+  case vmIntrinsics::_reverse_l:
   case vmIntrinsics::_reverseBytes_i:
   case vmIntrinsics::_reverseBytes_l:
   case vmIntrinsics::_reverseBytes_s:
@@ -2060,6 +2062,8 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
   case vmIntrinsics::_reverseBytes_s:           n = new ReverseBytesSNode( 0,   arg);  break;
   case vmIntrinsics::_reverseBytes_i:           n = new ReverseBytesINode( 0,   arg);  break;
   case vmIntrinsics::_reverseBytes_l:           n = new ReverseBytesLNode( 0,   arg);  break;
+  case vmIntrinsics::_reverse_i:                n = new ReverseINode(0, arg); break;
+  case vmIntrinsics::_reverse_l:                n = new ReverseLNode(0, arg); break;
   default:  fatal_unexpected_iid(id);  break;
   }
   set_result(_gvn.transform(n));
diff --git a/src/hotspot/share/opto/subnode.cpp b/src/hotspot/share/opto/subnode.cpp
index 73ee223bb7c..a687ea2b6cb 100644
--- a/src/hotspot/share/opto/subnode.cpp
+++ b/src/hotspot/share/opto/subnode.cpp
@@ -1899,3 +1899,50 @@ const Type* SqrtFNode::Value(PhaseGVN* phase) const {
   if( f < 0.0f ) return Type::FLOAT;
   return TypeF::make( (float)sqrt( (double)f ) );
 }
+
+static jlong reverse_bits(jlong val) {
+  jlong res = ((val & 0xF0F0F0F0F0F0F0F0L) >> 4) | ((val & 0x0F0F0F0F0F0F0F0F) << 4);
+  res = ((res & 0xCCCCCCCCCCCCCCCCL) >> 2) | ((res & 0x3333333333333333L) << 2);
+  res = ((res & 0xAAAAAAAAAAAAAAAAL) >> 1) | ((res & 0x5555555555555555L) << 1);
+  return res;
+}
+
+const Type* ReverseINode::Value(PhaseGVN* phase) const {
+  const Type *t1 = phase->type( in(1) );
+  if (t1 == Type::TOP) {
+    return Type::TOP;
+  }
+  const TypeInt* t1int = t1->isa_int();
+  if (t1int && t1int->is_con()) {
+    jint res = reverse_bits(t1int->get_con());
+    return TypeInt::make(res);
+  }
+  return t1int;
+}
+
+const Type* ReverseLNode::Value(PhaseGVN* phase) const {
+  const Type *t1 = phase->type( in(1) );
+  if (t1 == Type::TOP) {
+    return Type::TOP;
+  }
+  const TypeLong* t1long = t1->isa_long();
+  if (t1long && t1long->is_con()) {
+    jint res = reverse_bits(t1long->get_con());
+    return TypeLong::make(res);
+  }
+  return t1long;
+}
+
+Node* ReverseINode::Identity(PhaseGVN* phase) {
+  if (in(1)->Opcode() == Op_ReverseI) {
+    return in(1)->in(1);
+  }
+  return this;
+}
+
+Node* ReverseLNode::Identity(PhaseGVN* phase) {
+  if (in(1)->Opcode() == Op_ReverseL) {
+    return in(1)->in(1);
+  }
+  return this;
+}
diff --git a/src/hotspot/share/opto/subnode.hpp b/src/hotspot/share/opto/subnode.hpp
index cc2d58692ee..5e13556dfa8 100644
--- a/src/hotspot/share/opto/subnode.hpp
+++ b/src/hotspot/share/opto/subnode.hpp
@@ -580,6 +580,8 @@ public:
   virtual int Opcode() const;
   const Type *bottom_type() const { return TypeInt::INT; }
   virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node* Identity(PhaseGVN* phase);
+  virtual const Type* Value(PhaseGVN* phase) const;
 };
 
 //-------------------------------ReverseLNode--------------------------------
@@ -590,6 +592,8 @@ public:
   virtual int Opcode() const;
   const Type *bottom_type() const { return TypeLong::LONG; }
   virtual uint ideal_reg() const { return Op_RegL; }
+  virtual Node* Identity(PhaseGVN* phase);
+  virtual const Type* Value(PhaseGVN* phase) const;
 };
 
 #endif // SHARE_OPTO_SUBNODE_HPP
diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index ef66840628f..4ff567e97cb 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -2645,6 +2645,7 @@ bool SuperWord::output() {
                  opc == Op_PopCountI || opc == Op_PopCountL ||
                  opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
                  opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
+                 opc == Op_ReverseI || opc == Op_ReverseL ||
                  opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
                  opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
         assert(n->req() == 2, "only one input expected");
diff --git a/src/java.base/share/classes/java/lang/Integer.java b/src/java.base/share/classes/java/lang/Integer.java
index f327b2eb8b5..183e3bb8804 100644
--- a/src/java.base/share/classes/java/lang/Integer.java
+++ b/src/java.base/share/classes/java/lang/Integer.java
@@ -1762,6 +1762,7 @@ public final class Integer extends Number
      *     specified {@code int} value.
      * @since 1.5
      */
+    @IntrinsicCandidate
     public static int reverse(int i) {
         // HD, Figure 7-1
         i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555;
diff --git a/src/java.base/share/classes/java/lang/Long.java b/src/java.base/share/classes/java/lang/Long.java
index 5b2f20bbb9f..7c57f7435a1 100644
--- a/src/java.base/share/classes/java/lang/Long.java
+++ b/src/java.base/share/classes/java/lang/Long.java
@@ -1901,6 +1901,7 @@ public final class Long extends Number
      *     specified {@code long} value.
      * @since 1.5
      */
+    @IntrinsicCandidate
     public static long reverse(long i) {
         // HD, Figure 7-1
         i = (i & 0x5555555555555555L) << 1 | (i >>> 1) & 0x5555555555555555L;
diff --git a/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java b/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java
index 73d85290019..6519196f319 100644
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestIntVect.java
@@ -461,6 +461,10 @@ public class TestIntVect {
       for (int i=0; i<ARRLEN; i++) {
         errn += verify("test_reverse_bytes: ", i, a0[i], Integer.reverseBytes(a1[i]));
       }
+      test_reverse(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_reverse: ", i, a0[i], Integer.reverse(a1[i]));
+      }
 
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
@@ -934,6 +938,13 @@ public class TestIntVect {
     end = System.currentTimeMillis();
     System.out.println("test_reverse_bytes: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_reverse(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_reverse: " + (end - start));
+
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
@@ -1287,6 +1298,12 @@ public class TestIntVect {
     }
   }
 
+  static void test_reverse(int [] a0, int [] a1) {
+    for(int i = 0; i < a0.length; i++) {
+      a0[i] = Integer.reverse(a1[i]);
+    }
+  }
+
   static int verify(String text, int i, int elem, int val) {
     if (elem != val) {
       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
diff --git a/test/hotspot/jtreg/compiler/c2/cr6340864/TestLongVect.java b/test/hotspot/jtreg/compiler/c2/cr6340864/TestLongVect.java
index 21a1f512da0..baee5dff4f5 100644
--- a/test/hotspot/jtreg/compiler/c2/cr6340864/TestLongVect.java
+++ b/test/hotspot/jtreg/compiler/c2/cr6340864/TestLongVect.java
@@ -436,6 +436,10 @@ public class TestLongVect {
       for (int i=0; i<ARRLEN; i++) {
         errn += verify("test_reverse_bytes: ", i, a0[i], Long.reverseBytes(a1[i]));
       }
+      test_reverse(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_reverse: ", i, a0[i], Long.reverse(a1[i]));
+      }
     }
 
     if (errn > 0)
@@ -863,6 +867,12 @@ public class TestLongVect {
     end = System.currentTimeMillis();
     System.out.println("test_reverse_bytes: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_reverse(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_reverse: " + (end - start));
     return errn;
   }
 
@@ -1133,12 +1143,19 @@ public class TestLongVect {
       a0[i] = (long)((a1[i] & b)>>VALUE);
     }
   }
+
   static void test_reverse_bytes(long[] a0, long[] a1) {
     for(int i = 0; i < a0.length; i++) {
       a0[i] = Long.reverseBytes(a1[i]);
     }
   }
 
+  static void test_reverse(long[] a0, long[] a1) {
+    for(int i = 0; i < a0.length; i++) {
+      a0[i] = Long.reverse(a1[i]);
+    }
+  }
+
   static int verify(String text, int i, long elem, long val) {
     if (elem != val) {
       System.err.println(text + "[" + i + "] = " + elem + " != " + val);
diff --git a/test/hotspot/jtreg/compiler/vectorization/TestReverseBitsVector.java b/test/hotspot/jtreg/compiler/vectorization/TestReverseBitsVector.java
new file mode 100644
index 00000000000..c2d4ed3e6f9
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorization/TestReverseBitsVector.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+/**
+ * @test
+ * @bug 8290034
+ * @summary Auto-vectorization of Reverse bit operation.
+ * @requires vm.compiler2.enabled
+ * @library /test/lib /
+ * @run driver compiler.vectorization.TestReverseBitsVector
+ */
+
+package compiler.vectorization;
+
+import compiler.lib.ir_framework.*;
+import java.util.Random;
+
+public class TestReverseBitsVector {
+  private static final int ARRLEN = 1024;
+  private static final int ITERS  = 11000;
+
+  private static long [] linp;
+  private static long [] lout;
+  private static int  [] iinp;
+  private static int  [] iout;
+  private static short [] sinp;
+  private static short [] sout;
+  private static char [] cinp;
+  private static char [] cout;
+
+  public static void setup() {
+      Random r = new Random(1024);
+      linp = new long[ARRLEN];
+      lout = new long[ARRLEN];
+      iinp = new int[ARRLEN];
+      iout = new int[ARRLEN];
+      sinp = new short[ARRLEN];
+      sout = new short[ARRLEN];
+      cinp = new char[ARRLEN];
+      cout = new char[ARRLEN];
+      for(int i = 0; i < ARRLEN; i++) {
+          linp[i] = r.nextLong();
+          iinp[i] = r.nextInt();
+          sinp[i] = (short)r.nextInt();
+          cinp[i] = (char)r.nextInt();
+      }
+  }
+
+  public static void main(String args[]) {
+      setup();
+      TestFramework.runWithFlags("-XX:-TieredCompilation");
+      System.out.println("PASSED");
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, counts = {"ReverseV" , " > 0 "})
+  public void test_reverse_long1(long[] lout, long[] linp) {
+      for (int i = 0; i < lout.length; i+=1) {
+          lout[i] = Long.reverse(linp[i]);
+      }
+  }
+
+  @Run(test = {"test_reverse_long1"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_long1() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_long1(lout , linp);
+      }
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseL"})
+  public void test_reverse_long2(long[] lout, long[] linp) {
+      for (int i = 0; i < lout.length; i+=1) {
+          lout[i] = Long.reverse(Long.reverse(linp[i]));
+      }
+  }
+
+  @Run(test = {"test_reverse_long2"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_long2() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_long2(lout , linp);
+      }
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseL"})
+  public void test_reverse_long3(long[] lout, long[] linp) {
+      for (int i = 0; i < lout.length; i+=1) {
+          lout[i] = Long.reverse(linp[i] ^ linp[i]);
+      }
+  }
+
+  @Run(test = {"test_reverse_long3"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_long3() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_long3(lout , linp);
+      }
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, counts = {"ReverseV" , " > 0 "})
+  public void test_reverse_int1(int[] iout, int[] iinp) {
+      for (int i = 0; i < iout.length; i+=1) {
+          iout[i] = Integer.reverse(iinp[i]);
+      }
+  }
+
+  @Run(test = {"test_reverse_int1"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_int1() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_int1(iout , iinp);
+      }
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseI"})
+  public void test_reverse_int2(int[] iout, int[] iinp) {
+      for (int i = 0; i < iout.length; i+=1) {
+          iout[i] = Integer.reverse(Integer.reverse(iinp[i]));
+      }
+  }
+
+  @Run(test = {"test_reverse_int2"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_int2() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_int2(iout , iinp);
+      }
+  }
+
+  @Test
+  @IR(applyIfCPUFeature={"avx2", "true"}, failOn = {"ReverseV" , "ReverseI"})
+  public void test_reverse_int3(int[] iout, int[] iinp) {
+      for (int i = 0; i < iout.length; i+=1) {
+          iout[i] = Integer.reverse(iinp[i] ^ iinp[i]);
+      }
+  }
+
+  @Run(test = {"test_reverse_int3"}, mode = RunMode.STANDALONE)
+  public void kernel_test_reverse_int3() {
+      setup();
+      for (int i = 0; i < ITERS; i++) {
+          test_reverse_int3(iout , iinp);
+      }
+  }
+}
diff --git a/test/micro/org/openjdk/bench/java/lang/Integers.java b/test/micro/org/openjdk/bench/java/lang/Integers.java
index c375215070a..43ceb5d18d2 100644
--- a/test/micro/org/openjdk/bench/java/lang/Integers.java
+++ b/test/micro/org/openjdk/bench/java/lang/Integers.java
@@ -173,4 +173,11 @@ public class Integers {
             res[i] = Integer.reverseBytes(intsSmall[i]);
         }
     }
+
+    @Benchmark
+    public void reverse() {
+        for (int i = 0; i < size; i++) {
+            res[i] = Integer.reverse(intsSmall[i]);
+        }
+    }
 }
diff --git a/test/micro/org/openjdk/bench/java/lang/Longs.java b/test/micro/org/openjdk/bench/java/lang/Longs.java
index 0b5ee87dafa..765d00e9fb9 100644
--- a/test/micro/org/openjdk/bench/java/lang/Longs.java
+++ b/test/micro/org/openjdk/bench/java/lang/Longs.java
@@ -168,4 +168,11 @@ public class Longs {
             res[i] = Long.reverseBytes(longArraySmall[i]);
         }
     }
+
+    @Benchmark
+    public void reverse() {
+        for (int i = 0; i < size; i++) {
+            res[i] = Long.reverse(longArraySmall[i]);
+        }
+    }
 }