From 4458de95f845c036c1c8e28df7043e989beaee98 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Tue, 6 Dec 2022 09:35:27 +0000 Subject: [PATCH] 8297172: Fix some issues of auto-vectorization of `Long.bitCount/numberOfTrailingZeros/numberOfLeadingZeros()` Reviewed-by: kvn, thartmann --- src/hotspot/cpu/aarch64/aarch64_vector.ad | 31 +------ src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 | 31 +------ src/hotspot/cpu/x86/x86.ad | 48 ---------- src/hotspot/share/opto/superword.cpp | 59 ++++++++----- src/hotspot/share/opto/superword.hpp | 1 + src/hotspot/share/opto/vectornode.cpp | 11 --- src/hotspot/share/opto/vectornode.hpp | 15 +++- .../irTests/TestDisableAutoVectOpcodes.java | 16 ++++ .../compiler/lib/ir_framework/IRNode.java | 10 +++ .../TestNumberOfContinuousZeros.java | 88 +++++++++++++++++++ .../vectorization/TestPopCountVectorLong.java | 4 +- 11 files changed, 172 insertions(+), 142 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad index 7868d7cf46b..0a64d35dd47 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector.ad +++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad @@ -132,6 +132,8 @@ source %{ // Vector API intrinsics. if ((opcode == Op_VectorCastD2X && bt == T_INT) || (opcode == Op_VectorCastL2X && bt == T_FLOAT) || + (opcode == Op_CountLeadingZerosV && bt == T_LONG) || + (opcode == Op_CountTrailingZerosV && bt == T_LONG) || opcode == Op_AddReductionVD || opcode == Op_AddReductionVF || opcode == Op_MulReductionVD || opcode == Op_MulReductionVF || opcode == Op_MulVL) { @@ -5672,7 +5674,6 @@ instruct vpopcountI(vReg dst, vReg src) %{ // vector popcount - LONG instruct vpopcountL(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (PopCountVL src)); format %{ "vpopcountL $dst, $src" %} ins_encode %{ @@ -5688,32 +5689,6 @@ instruct vpopcountL(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} -// If the PopCountVL is generated by auto-vectorization, the dst basic -// type is T_INT. And once we have unified the type definition for -// Vector API and auto-vectorization, this rule can be merged with -// "vpopcountL" rule. - -instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT); - match(Set dst (PopCountVL src)); - effect(TEMP_DEF dst, TEMP tmp); - format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %} - ins_encode %{ - if (UseSVE == 0) { - __ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister); - __ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D); - } else { - __ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister); - __ sve_vector_narrow($dst$$FloatRegister, __ S, - $dst$$FloatRegister, __ D, $tmp$$FloatRegister); - } - %} - ins_pipe(pipe_slow); -%} - // vector popcount - predicated instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{ @@ -5729,7 +5704,7 @@ instruct vpopcountI_masked(vReg dst_src, pRegGov pg) %{ %} instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{ - predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG); + predicate(UseSVE > 0); match(Set dst_src (PopCountVL dst_src pg)); format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %} ins_encode %{ diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 index 118611b1675..65adf7d6ee0 100644 --- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 @@ -122,6 +122,8 @@ source %{ // Vector API intrinsics. if ((opcode == Op_VectorCastD2X && bt == T_INT) || (opcode == Op_VectorCastL2X && bt == T_FLOAT) || + (opcode == Op_CountLeadingZerosV && bt == T_LONG) || + (opcode == Op_CountTrailingZerosV && bt == T_LONG) || opcode == Op_AddReductionVD || opcode == Op_AddReductionVF || opcode == Op_MulReductionVD || opcode == Op_MulReductionVF || opcode == Op_MulVL) { @@ -4055,7 +4057,6 @@ instruct vpopcountI(vReg dst, vReg src) %{ // vector popcount - LONG instruct vpopcountL(vReg dst, vReg src) %{ - predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (PopCountVL src)); format %{ "vpopcountL $dst, $src" %} ins_encode %{ @@ -4071,37 +4072,11 @@ instruct vpopcountL(vReg dst, vReg src) %{ ins_pipe(pipe_slow); %} -// If the PopCountVL is generated by auto-vectorization, the dst basic -// type is T_INT. And once we have unified the type definition for -// Vector API and auto-vectorization, this rule can be merged with -// "vpopcountL" rule. - -instruct vpopcountL_I(vReg dst, vReg src, vReg tmp) %{ - predicate(Matcher::vector_element_basic_type(n) == T_INT); - match(Set dst (PopCountVL src)); - effect(TEMP_DEF dst, TEMP tmp); - format %{ "vpopcountL_I $dst, $src\t# KILL $tmp" %} - ins_encode %{ - if (UseSVE == 0) { - __ cnt($dst$$FloatRegister, __ T16B, $src$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T16B, $dst$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T8H, $dst$$FloatRegister); - __ uaddlp($dst$$FloatRegister, __ T4S, $dst$$FloatRegister); - __ xtn($dst$$FloatRegister, __ T2S, $dst$$FloatRegister, __ T2D); - } else { - __ sve_cnt($dst$$FloatRegister, __ D, ptrue, $src$$FloatRegister); - __ sve_vector_narrow($dst$$FloatRegister, __ S, - $dst$$FloatRegister, __ D, $tmp$$FloatRegister); - } - %} - ins_pipe(pipe_slow); -%} - // vector popcount - predicated UNARY_OP_PREDICATE(vpopcountI, PopCountVI, sve_cnt) instruct vpopcountL_masked(vReg dst_src, pRegGov pg) %{ - predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_LONG); + predicate(UseSVE > 0); match(Set dst_src (PopCountVL dst_src pg)); format %{ "vpopcountL_masked $dst_src, $pg, $dst_src" %} ins_encode %{ diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index a6f2adabb28..629ae77567d 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -8875,12 +8875,6 @@ instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ int vlen_enc = vector_length_encoding(this, $src); BasicType bt = Matcher::vector_element_basic_type(this, $src); __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) { - __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } %} ins_pipe( pipe_slow ); %} @@ -8911,18 +8905,6 @@ instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) % BasicType bt = Matcher::vector_element_basic_type(this, $src); __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) { - if (VM_Version::supports_avx512vl()) { - __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } else { - assert(VM_Version::supports_avx2(), ""); - __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); - __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); - } - } %} ins_pipe( pipe_slow ); %} @@ -8939,15 +8921,8 @@ instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); BasicType bt = Matcher::vector_element_basic_type(this, $src); - BasicType rbt = Matcher::vector_element_basic_type(this); __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (bt == T_LONG && rbt == T_INT) { - __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } %} ins_pipe( pipe_slow ); %} @@ -8993,17 +8968,8 @@ instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, v ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); BasicType bt = Matcher::vector_element_basic_type(this, $src); - BasicType rbt = Matcher::vector_element_basic_type(this); __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (bt == T_LONG && rbt == T_INT) { - assert(VM_Version::supports_avx2(), ""); - __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); - __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); - } %} ins_pipe( pipe_slow ); %} @@ -9408,15 +9374,8 @@ instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); BasicType bt = Matcher::vector_element_basic_type(this, $src); - BasicType rbt = Matcher::vector_element_basic_type(this); __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, k0, noreg, true, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (rbt == T_INT && bt == T_LONG) { - __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } %} ins_pipe( pipe_slow ); %} @@ -9491,15 +9450,8 @@ instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, ve ins_encode %{ int vlen_enc = vector_length_encoding(this, $src); BasicType bt = Matcher::vector_element_basic_type(this, $src); - BasicType rbt = Matcher::vector_element_basic_type(this); __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); - // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV - // should be succeeded by its corresponding vector IR and following - // special handling should be removed. - if (rbt == T_INT && bt == T_LONG) { - __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } %} ins_pipe( pipe_slow ); %} diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index fc4051aaeaf..35dce686ec5 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -2079,6 +2079,14 @@ bool SuperWord::implemented(Node_List* p) { } else if (is_cmove_fp_opcode(opc)) { retValue = is_cmov_pack(p) && VectorNode::implemented(opc, size, velt_basic_type(p0)); NOT_PRODUCT(if(retValue && is_trace_cmov()) {tty->print_cr("SWPointer::implemented: found cmove pack"); print_pack(p);}) + } else if (requires_long_to_int_conversion(opc)) { + // Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros + // returns int type, but Vector API for them returns long type. To unify + // the implementation in backend, superword splits the vector implementation + // for Java API into an execution node with long type plus another node + // converting long to int. + retValue = VectorNode::implemented(opc, size, T_LONG) && + VectorCastNode::implemented(Op_ConvL2I, size, T_LONG, T_INT); } else { // Vector unsigned right shift for signed subword types behaves differently // from Java Spec. But when the shift amount is a constant not greater than @@ -2096,6 +2104,18 @@ bool SuperWord::implemented(Node_List* p) { bool SuperWord::is_cmov_pack(Node_List* p) { return _cmovev_kit.pack(p->at(0)) != NULL; } + +bool SuperWord::requires_long_to_int_conversion(int opc) { + switch(opc) { + case Op_PopCountL: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosL: + return true; + default: + return false; + } +} + //------------------------------same_inputs-------------------------- // For pack p, are all idx operands the same? bool SuperWord::same_inputs(Node_List* p, int idx) { @@ -2666,16 +2686,28 @@ bool SuperWord::output() { opc == Op_AbsI || opc == Op_AbsL || opc == Op_NegF || opc == Op_NegD || opc == Op_RoundF || opc == Op_RoundD || - opc == Op_PopCountI || opc == Op_PopCountL || opc == Op_ReverseBytesI || opc == Op_ReverseBytesL || opc == Op_ReverseBytesUS || opc == Op_ReverseBytesS || opc == Op_ReverseI || opc == Op_ReverseL || - opc == Op_CountLeadingZerosI || opc == Op_CountLeadingZerosL || - opc == Op_CountTrailingZerosI || opc == Op_CountTrailingZerosL) { + opc == Op_PopCountI || opc == Op_CountLeadingZerosI || + opc == Op_CountTrailingZerosI) { assert(n->req() == 2, "only one input expected"); Node* in = vector_opd(p, 1); vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); vlen_in_bytes = vn->as_Vector()->length_in_bytes(); + } else if (requires_long_to_int_conversion(opc)) { + // Java API for Long.bitCount/numberOfLeadingZeros/numberOfTrailingZeros + // returns int type, but Vector API for them returns long type. To unify + // the implementation in backend, superword splits the vector implementation + // for Java API into an execution node with long type plus another node + // converting long to int. + assert(n->req() == 2, "only one input expected"); + Node* in = vector_opd(p, 1); + Node* longval = VectorNode::make(opc, in, NULL, vlen, T_LONG); + _igvn.register_new_node_with_optimizer(longval); + _phase->set_ctrl(longval, _phase->get_ctrl(p->at(0))); + vn = VectorCastNode::make(Op_VectorCastL2X, longval, T_INT, vlen); + vlen_in_bytes = vn->as_Vector()->length_in_bytes(); } else if (VectorNode::is_convert_opcode(opc)) { assert(n->req() == 2, "only one input expected"); BasicType bt = velt_basic_type(n); @@ -3198,27 +3230,11 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) { return true; } - if (VectorNode::is_type_transition_long_to_int(use)) { - // PopCountL/CountLeadingZerosL/CountTrailingZerosL takes long and produces - // int - hence the special checks on alignment and size. - if (u_pk->size() != d_pk->size()) { - return false; - } - for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) { - Node* ui = u_pk->at(i); - Node* di = d_pk->at(i); - if (alignment(ui) * 2 != alignment(di)) { - return false; - } - } - return true; - } - if (u_pk->size() != d_pk->size()) return false; if (longer_type_for_conversion(use) != T_ILLEGAL) { - // type conversion takes a type of a kind of size and produces a type of + // These opcodes take a type of a kind of size and produce a type of // another size - hence the special checks on alignment and size. for (uint i = 0; i < u_pk->size(); i++) { Node* ui = u_pk->at(i); @@ -3467,7 +3483,8 @@ void SuperWord::compute_max_depth() { } BasicType SuperWord::longer_type_for_conversion(Node* n) { - if (!VectorNode::is_convert_opcode(n->Opcode()) || + if (!(VectorNode::is_convert_opcode(n->Opcode()) || + requires_long_to_int_conversion(n->Opcode())) || !in_bb(n->in(1))) { return T_ILLEGAL; } diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp index 3fac2871e76..5da1d7b21c2 100644 --- a/src/hotspot/share/opto/superword.hpp +++ b/src/hotspot/share/opto/superword.hpp @@ -457,6 +457,7 @@ class SuperWord : public ResourceObj { bool is_cmov_pack(Node_List* p); bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); } static bool is_cmove_fp_opcode(int opc) { return (opc == Op_CMoveF || opc == Op_CMoveD); } + static bool requires_long_to_int_conversion(int opc); // For pack p, are all idx operands the same? bool same_inputs(Node_List* p, int idx); // CloneMap utilities diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp index 85dbf9e42cf..92ec6d80cfe 100644 --- a/src/hotspot/share/opto/vectornode.cpp +++ b/src/hotspot/share/opto/vectornode.cpp @@ -343,17 +343,6 @@ bool VectorNode::is_muladds2i(Node* n) { return false; } -bool VectorNode::is_type_transition_long_to_int(Node* n) { - switch(n->Opcode()) { - case Op_PopCountL: - case Op_CountLeadingZerosL: - case Op_CountTrailingZerosL: - return true; - default: - return false; - } -} - bool VectorNode::is_roundopD(Node* n) { if (n->Opcode() == Op_RoundDoubleMode) { return true; diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp index 590fe106616..aa672ca983e 100644 --- a/src/hotspot/share/opto/vectornode.hpp +++ b/src/hotspot/share/opto/vectornode.hpp @@ -99,7 +99,6 @@ class VectorNode : public TypeNode { static bool is_type_transition_short_to_int(Node* n); static bool is_type_transition_to_int(Node* n); static bool is_muladds2i(Node* n); - static bool is_type_transition_long_to_int(Node* n); static bool is_roundopD(Node* n); static bool is_scalar_rotate(Node* n); static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt); @@ -551,7 +550,9 @@ class PopCountVINode : public VectorNode { // Vector popcount long bits class PopCountVLNode : public VectorNode { public: - PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {} + PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) { + assert(vt->element_basic_type() == T_LONG, "must be long"); + } virtual int Opcode() const; }; @@ -1732,7 +1733,10 @@ public: class CountLeadingZerosVNode : public VectorNode { public: CountLeadingZerosVNode(Node* in, const TypeVect* vt) - : VectorNode(in, vt) {} + : VectorNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(), + "must be the same"); + } virtual int Opcode() const; }; @@ -1740,7 +1744,10 @@ class CountLeadingZerosVNode : public VectorNode { class CountTrailingZerosVNode : public VectorNode { public: CountTrailingZerosVNode(Node* in, const TypeVect* vt) - : VectorNode(in, vt) {} + : VectorNode(in, vt) { + assert(in->bottom_type()->is_vect()->element_basic_type() == vt->element_basic_type(), + "must be the same"); + } virtual int Opcode() const; }; diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java b/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java index d60625d722e..f493b3fc307 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestDisableAutoVectOpcodes.java @@ -116,4 +116,20 @@ public class TestDisableAutoVectOpcodes { dresult += result; } + @Test + @IR(failOn = {IRNode.COUNTTRAILINGZEROS_VL}) + public void testNumberOfTrailingZeros() { + for (int i = 0; i < SIZE; ++i) { + inta[i] = Long.numberOfTrailingZeros(longa[i]); + } + } + + @Test + @IR(failOn = {IRNode.COUNTLEADINGZEROS_VL}) + public void testNumberOfLeadingZeros() { + for (int i = 0; i < SIZE; ++i) { + inta[i] = Long.numberOfLeadingZeros(longa[i]); + } + } + } diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 5997435a748..6f30d9f83be 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -714,6 +714,16 @@ public class IRNode { superWordNodes(POPCOUNT_VL, "PopCountVL"); } + public static final String COUNTTRAILINGZEROS_VL = PREFIX + "COUNTTRAILINGZEROS_VL" + POSTFIX; + static { + superWordNodes(COUNTTRAILINGZEROS_VL, "CountTrailingZerosV"); + } + + public static final String COUNTLEADINGZEROS_VL = PREFIX + "COUNTLEADINGZEROS_VL" + POSTFIX; + static { + superWordNodes(COUNTLEADINGZEROS_VL, "CountLeadingZerosV"); + } + public static final String POPULATE_INDEX = PREFIX + "POPULATE_INDEX" + POSTFIX; static { String regex = START + "PopulateIndex" + MID + END; diff --git a/test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java b/test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java new file mode 100644 index 00000000000..c64b851096d --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorization/TestNumberOfContinuousZeros.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** +* @test +* @key randomness +* @summary Test vectorization of numberOfTrailingZeros/numberOfLeadingZeros for Long +* @requires vm.compiler2.enabled +* @requires (os.simpleArch == "x64" & vm.cpu.features ~= ".*avx2.*") | +* (os.simpleArch == "aarch64" & vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0)) +* @library /test/lib / +* @run driver compiler.vectorization.TestNumberOfContinuousZeros +*/ + +package compiler.vectorization; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.test.lib.Asserts; + +public class TestNumberOfContinuousZeros { + private long[] input; + private int[] output; + private static final int LEN = 1024; + private Random rng; + + public static void main(String args[]) { + TestFramework.run(); + } + + public TestNumberOfContinuousZeros() { + input = new long[LEN]; + output = new int[LEN]; + rng = new Random(42); + for (int i = 0; i < LEN; ++i) { + input[i] = rng.nextLong(); + } + } + + @Test + @IR(counts = {IRNode.COUNTTRAILINGZEROS_VL, "> 0"}) + public void vectorizeNumberOfTrailingZeros() { + for (int i = 0; i < LEN; ++i) { + output[i] = Long.numberOfTrailingZeros(input[i]); + } + } + + @Test + @IR(counts = {IRNode.COUNTLEADINGZEROS_VL, "> 0"}) + public void vectorizeNumberOfLeadingZeros() { + for (int i = 0; i < LEN; ++i) { + output[i] = Long.numberOfLeadingZeros(input[i]); + } + } + + @Run(test = {"vectorizeNumberOfTrailingZeros", "vectorizeNumberOfLeadingZeros"}) + public void checkResult() { + vectorizeNumberOfTrailingZeros(); + for (int i = 0; i < LEN; ++i) { + Asserts.assertEquals(output[i], Long.numberOfTrailingZeros(input[i])); + } + vectorizeNumberOfLeadingZeros(); + for (int i = 0; i < LEN; ++i) { + Asserts.assertEquals(output[i], Long.numberOfLeadingZeros(input[i])); + } + } +} + diff --git a/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java b/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java index b6b2e42ce9b..20912582b7f 100644 --- a/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java +++ b/test/hotspot/jtreg/compiler/vectorization/TestPopCountVectorLong.java @@ -25,8 +25,8 @@ * @test * @summary Test vectorization of popcount for Long * @requires vm.compiler2.enabled -* @requires vm.cpu.features ~= ".*avx512bw.*" | (vm.cpu.features ~= ".*sve.*" & (vm.opt.UseSVE == "null" | vm.opt.UseSVE > 0)) -* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64" +* @requires ((os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") & vm.cpu.features ~= ".*avx512bw.*") | +* os.simpleArch == "aarch64" * @library /test/lib / * @run driver compiler.vectorization.TestPopCountVectorLong */