8297172: Fix some issues of auto-vectorization of Long.bitCount/numberOfTrailingZeros/numberOfLeadingZeros()

Reviewed-by: kvn, thartmann
2022-12-06 09:35:27 +00:00 · 2022-12-06 09:35:27 +00:00 · 4458de95f8
commit 4458de95f8
parent a61399854a
11 changed files with 172 additions and 142 deletions
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@ -132,6 +132,8 @@ source %{
      // Vector API intrinsics.
      if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
          (opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
          (opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
          (opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
          opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
          opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
          opcode == Op_MulVL) {
@ -5672,7 +5674,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
 // vector popcount - LONG
 instruct vpopcountL(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (PopCountVL src));
  format %{ "vpopcountL $dst, $src" %}
  ins_encode %{
@ -5688,32 +5689,6 @@ instruct vpopcountL(vReg dst, vReg src) %{
  ins_pipe(pipe_slow);
 %}
 // If the PopCountVL is generated by auto-vectorization, the dst basic
 // type is T_INT. And once we have unified the type definition for
 // Vector API and auto-vectorization, this rule can be merged with
 // "vpopcountL" rule.
 instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (PopCountVL src));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    if (UseSVE == 0) {
      __ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
      __ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
    } else {
      __ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
      __ sve_vector_narrow($dst$$FloatRegister, __ S,
                           $dst$$FloatRegister, __ D, $tmp$$FloatRegister);
    }
  %}
  ins_pipe(pipe_slow);
 %}
 // vector popcount - predicated
 instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
@ -5729,7 +5704,7 @@ instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{
 %}
 instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
-  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  predicate(UseSVE > 0);
  match(Set dst_src (PopCountVL dst_src pg));
  format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
  ins_encode %{
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@ -122,6 +122,8 @@ source %{
      // Vector API intrinsics.
      if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
          (opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
          (opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
          (opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
          opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
          opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
          opcode == Op_MulVL) {
@ -4055,7 +4057,6 @@ instruct vpopcountI(vReg dst, vReg src) %{
 // vector popcount - LONG
 instruct vpopcountL(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (PopCountVL src));
  format %{ "vpopcountL $dst, $src" %}
  ins_encode %{
@ -4071,37 +4072,11 @@ instruct vpopcountL(vReg dst, vReg src) %{
  ins_pipe(pipe_slow);
 %}
 // If the PopCountVL is generated by auto-vectorization, the dst basic
 // type is T_INT. And once we have unified the type definition for
 // Vector API and auto-vectorization, this rule can be merged with
 // "vpopcountL" rule.
 instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (PopCountVL src));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    if (UseSVE == 0) {
      __ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister);
      __ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister);
      __ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D);
    } else {
      __ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister);
      __ sve_vector_narrow($dst$$FloatRegister, __ S,
                           $dst$$FloatRegister, __ D, $tmp$$FloatRegister);
    }
  %}
  ins_pipe(pipe_slow);
 %}
 // vector popcount - predicated
 UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt)
 instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{
-  predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG);
+  predicate(UseSVE > 0);
  match(Set dst_src (PopCountVL dst_src pg));
  format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %}
  ins_encode %{
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -8875,12 +8875,6 @@ instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
    int vlen_enc = vector_length_encoding(this, $src);
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
    // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
    // should be succeeded by its corresponding vector IR and following
    // special handling should be removed.
    if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
      __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
    }
  %}
  ins_pipe( pipe_slow );
 %}
@ -8911,18 +8905,6 @@ instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
                                $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
    // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
    // should be succeeded by its corresponding vector IR and following
    // special handling should be removed.
    if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
      if (VM_Version::supports_avx512vl()) {
        __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
      } else {
        assert(VM_Version::supports_avx2(), "");
        __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
        __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
      }
    }
  %}
  ins_pipe( pipe_slow );
 %}
@ -8939,15 +8921,8 @@ instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp)
  ins_encode %{
    int vlen_enc = vector_length_encoding(this, $src);
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    BasicType rbt = Matcher::vector_element_basic_type(this);
    __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
                                        xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
    // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
    // should be succeeded by its corresponding vector IR and following
    // special handling should be removed.
    if (bt == T_LONG && rbt == T_INT) {
      __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
    }
  %}
  ins_pipe( pipe_slow );
 %}
@ -8993,17 +8968,8 @@ instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, v
  ins_encode %{
    int vlen_enc = vector_length_encoding(this, $src);
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    BasicType rbt = Matcher::vector_element_basic_type(this);
    __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
    // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
    // should be succeeded by its corresponding vector IR and following
    // special handling should be removed.
    if (bt == T_LONG && rbt == T_INT) {
      assert(VM_Version::supports_avx2(), "");
      __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
      __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
    }
  %}
  ins_pipe( pipe_slow );
 %}
@ -9408,15 +9374,8 @@ instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
  ins_encode %{
     int vlen_enc = vector_length_encoding(this, $src);
     BasicType bt = Matcher::vector_element_basic_type(this, $src);
     BasicType rbt = Matcher::vector_element_basic_type(this);
     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
                                        xnoreg, xnoreg, k0, noreg, true, vlen_enc);
     // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
     // should be succeeded by its corresponding vector IR and following
     // special handling should be removed.
     if (rbt == T_INT && bt == T_LONG) {
       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
     }
  %}
  ins_pipe( pipe_slow );
 %}
@ -9491,15 +9450,8 @@ instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, ve
  ins_encode %{
    int vlen_enc = vector_length_encoding(this, $src);
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    BasicType rbt = Matcher::vector_element_basic_type(this);
    __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
                                      $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
    // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
    // should be succeeded by its corresponding vector IR and following
    // special handling should be removed.
    if (rbt == T_INT && bt == T_LONG) {
      __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
    }
  %}
  ins_pipe( pipe_slow );
 %}
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@ -2079,6 +2079,14 @@ bool SuperWord::implemented(Node_List* p) {
    } else if (is_cmove_fp_opcode(opc)) {
      retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0));
      NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);})
    } else if (requires_long_to_int_conversion(opc)) {
      // Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
      // returns int type, but Vector API for them returns long type. To unify
      // the implementation in backend, superword splits the vector implementation
      // for Java API into an execution node with long type plus another node
      // converting long to int.
      retValue = VectorNode::implemented(opc, size, T_LONG) &&
                 VectorCastNode::implemented(Op_ConvL2I, size, T_LONG, T_INT);
    } else {
      // Vector unsigned right shift for signed subword types behaves differently
      // from Java Spec. But when the shift amount is a constant not greater than
@ -2096,6 +2104,18 @@ bool SuperWord::implemented(Node_List* p) {
 bool SuperWord::is_cmov_pack(Node_List* p) {
  return _cmovev_kit.pack(p->at(0)) != NULL;
 }
 bool SuperWord::requires_long_to_int_conversion(int opc) {
  switch(opc) {
    case Op_PopCountL:
    case Op_CountLeadingZerosL:
    case Op_CountTrailingZerosL:
      return true;
    default:
      return false;
  }
 }
 //------------------------------same_inputs--------------------------
 // For pack p, are all idx operands the same?
 bool SuperWord::same_inputs(Node_List* p, int idx) {
@ -2666,16 +2686,28 @@ bool SuperWord::output() {
                 opc == Op_AbsI || opc == Op_AbsL ||
                 opc == Op_NegF || opc == Op_NegD ||
                 opc == Op_RoundF || opc == Op_RoundD ||
                 opc == Op_PopCountI || opc == Op_PopCountL ||
                 opc == Op_ReverseBytesI || opc == Op_ReverseBytesL ||
                 opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS ||
                 opc == Op_ReverseI || opc == Op_ReverseL ||
-                 opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL ||
+                 opc == Op_PopCountI || opc == Op_CountLeadingZerosI ||
-                 opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) {
+                 opc == Op_CountTrailingZerosI) {
        assert(n->req() == 2, "only one input expected");
        Node* in = vector_opd(p, 1);
        vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
      } else if (requires_long_to_int_conversion(opc)) {
        // Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros
        // returns int type, but Vector API for them returns long type. To unify
        // the implementation in backend, superword splits the vector implementation
        // for Java API into an execution node with long type plus another node
        // converting long to int.
        assert(n->req() == 2, "only one input expected");
        Node* in = vector_opd(p, 1);
        Node* longval = VectorNode::make(opc, in, NULL, vlen, T_LONG);
        _igvn.register_new_node_with_optimizer(longval);
        _phase->set_ctrl(longval, _phase->get_ctrl(p->at(0)));
        vn = VectorCastNode::make(Op_VectorCastL2X, longval, T_INT, vlen);
        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
      } else if (VectorNode::is_convert_opcode(opc)) {
        assert(n->req() == 2, "only one input expected");
        BasicType bt = velt_basic_type(n);
@ -3198,27 +3230,11 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
    return true;
  }
  if (VectorNode::is_type_transition_long_to_int(use)) {
    // PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces
    // int - hence the special checks on alignment and size.
    if (u_pk->size() != d_pk->size()) {
      return false;
    }
    for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
      Node* ui = u_pk->at(i);
      Node* di = d_pk->at(i);
      if (alignment(ui) * 2 != alignment(di)) {
        return false;
      }
    }
    return true;
  }
  if (u_pk->size() != d_pk->size())
    return false;
  if (longer_type_for_conversion(use) != T_ILLEGAL) {
-    // type conversion takes a type of a kind of size and produces a type of
+    // These opcodes take a type of a kind of size and produce a type of
    // another size - hence the special checks on alignment and size.
    for (uint i = 0; i < u_pk->size(); i++) {
      Node* ui = u_pk->at(i);
@ -3467,7 +3483,8 @@ void SuperWord::compute_max_depth() {
 }
 BasicType SuperWord::longer_type_for_conversion(Node* n) {
-  if (!VectorNode::is_convert_opcode(n->Opcode()) ||
+  if (!(VectorNode::is_convert_opcode(n->Opcode()) ||
        requires_long_to_int_conversion(n->Opcode())) ||
      !in_bb(n->in(1))) {
    return T_ILLEGAL;
  }
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@ -457,6 +457,7 @@ class SuperWord : public ResourceObj {
  bool is_cmov_pack(Node_List* p);
  bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); }
  static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); }
  static bool requires_long_to_int_conversion(int opc);
  // For pack p, are all idx operands the same?
  bool same_inputs(Node_List* p, int idx);
  // CloneMap utilities
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@ -343,17 +343,6 @@ bool VectorNode::is_muladds2i(Node* n) {
  return false;
 }
 bool VectorNode::is_type_transition_long_to_int(Node* n) {
  switch(n->Opcode()) {
    case Op_PopCountL:
    case Op_CountLeadingZerosL:
    case Op_CountTrailingZerosL:
       return true;
    default:
       return false;
  }
 }
 bool VectorNode::is_roundopD(Node* n) {
  if (n->Opcode() == Op_RoundDoubleMode) {
    return true;
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@ -99,7 +99,6 @@ class VectorNode : public TypeNode {
  static bool is_type_transition_short_to_int(Node* n);
  static bool is_type_transition_to_int(Node* n);
  static bool is_muladds2i(Node* n);
  static bool is_type_transition_long_to_int(Node* n);
  static bool is_roundopD(Node* n);
  static bool is_scalar_rotate(Node* n);
  static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
@ -551,7 +550,9 @@ class PopCountVINode : public VectorNode {
 // Vector popcount long bits
 class PopCountVLNode : public VectorNode {
 public:
-  PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
+  PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {
    assert(vt->element_basic_type() == T_LONG, "must be long");
  }
  virtual int Opcode() const;
 };
@ -1732,7 +1733,10 @@ public:
 class CountLeadingZerosVNode : public VectorNode {
 public:
  CountLeadingZerosVNode(Node* in, const TypeVect* vt)
-  : VectorNode(in, vt) {}
+  : VectorNode(in, vt) {
    assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
           "must be the same");
  }
  virtual int Opcode() const;
 };
@ -1740,7 +1744,10 @@ class CountLeadingZerosVNode : public VectorNode {
 class CountTrailingZerosVNode : public VectorNode {
 public:
  CountTrailingZerosVNode(Node* in, const TypeVect* vt)
-  : VectorNode(in, vt) {}
+  : VectorNode(in, vt) {
    assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(),
           "must be the same");
  }
  virtual int Opcode() const;
 };
--- a/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java
@ -116,4 +116,20 @@ public class TestDisableAutoVectOpcodes {
        dresult += result;
    }
    @Test
    @IR(failOn = {IRNode.COUNTTRAILINGZEROS_VL})
    public void testNumberOfTrailingZeros() {
        for (int i = 0; i < SIZE; ++i) {
            inta[i] = Long.numberOfTrailingZeros(longa[i]);
        }
    }
    @Test
    @IR(failOn = {IRNode.COUNTLEADINGZEROS_VL})
    public void testNumberOfLeadingZeros() {
        for (int i = 0; i < SIZE; ++i) {
            inta[i] = Long.numberOfLeadingZeros(longa[i]);
        }
    }
 }
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@ -714,6 +714,16 @@ public class IRNode {
        superWordNodes(POPCOUNT_VL, "PopCountVL");
    }
    public static final String COUNTTRAILINGZEROS_VL = PREFIX + "COUNTTRAILINGZEROS_VL" + POSTFIX;
    static {
        superWordNodes(COUNTTRAILINGZEROS_VL, "CountTrailingZerosV");
    }
    public static final String COUNTLEADINGZEROS_VL = PREFIX + "COUNTLEADINGZEROS_VL" + POSTFIX;
    static {
        superWordNodes(COUNTLEADINGZEROS_VL, "CountLeadingZerosV");
    }
    public static final String POPULATE_INDEX = PREFIX + "POPULATE_INDEX" + POSTFIX;
    static {
        String regex = START + "PopulateIndex" + MID + END;
--- a/test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java
@ -0,0 +1,88 @@
 /*
 * Copyright (c) 2022, Arm Limited. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 /**
 * @test
 * @key randomness
 * @summary Test vectorization of numberOfTrailingZeros/numberOfLeadingZeros for Long
 * @requires vm.compiler2.enabled
 * @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") |
 *           (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
 * @library /test/lib /
 * @run driver compiler.vectorization.TestNumberOfContinuousZeros
 */
 package compiler.vectorization;
 import compiler.lib.ir_framework.*;
 import java.util.Random;
 import jdk.test.lib.Asserts;
 public class TestNumberOfContinuousZeros {
    private long[] input;
    private int[] output;
    private static final int LEN = 1024;
    private Random rng;
    public static void main(String args[]) {
        TestFramework.run();
    }
    public TestNumberOfContinuousZeros() {
        input = new long[LEN];
        output = new int[LEN];
        rng = new Random(42);
        for (int i = 0; i < LEN; ++i) {
            input[i] = rng.nextLong();
        }
    }
    @Test
    @IR(counts = {IRNode.COUNTTRAILINGZEROS_VL, "> 0"})
    public void vectorizeNumberOfTrailingZeros() {
        for (int i = 0; i < LEN; ++i) {
            output[i] = Long.numberOfTrailingZeros(input[i]);
        }
    }
    @Test
    @IR(counts = {IRNode.COUNTLEADINGZEROS_VL, "> 0"})
    public void vectorizeNumberOfLeadingZeros() {
        for (int i = 0; i < LEN; ++i) {
            output[i] = Long.numberOfLeadingZeros(input[i]);
        }
    }
    @Run(test = {"vectorizeNumberOfTrailingZeros", "vectorizeNumberOfLeadingZeros"})
    public void checkResult() {
        vectorizeNumberOfTrailingZeros();
        for (int i = 0; i < LEN; ++i) {
            Asserts.assertEquals(output[i], Long.numberOfTrailingZeros(input[i]));
        }
        vectorizeNumberOfLeadingZeros();
        for (int i = 0; i < LEN; ++i) {
            Asserts.assertEquals(output[i], Long.numberOfLeadingZeros(input[i]));
        }
    }
 }
--- a/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java
@ -25,8 +25,8 @@
 * @test
 * @summary Test vectorization of popcount for Long
 * @requires vm.compiler2.enabled
-* @requires vm.cpu.features ~= ".*avx512bw.*" | (vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0))
+* @requires ((os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") & vm.cpu.features ~= ".*avx512bw.*") |
-* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+*           os.simpleArch == "aarch64"
 * @library /test/lib /
 * @run driver compiler.vectorization.TestPopCountVectorLong
 */