8278868: Add x86 vectorization support for Long.bitCount()

Reviewed-by: jbhateja, sviswanathan, kvn
This commit is contained in:
Vamsi Parasa 2022-01-11 18:47:42 +00:00 committed by Sandhya Viswanathan
parent 67141849d9
commit c4518e257c
11 changed files with 237 additions and 6 deletions

View File

@ -4829,6 +4829,14 @@ void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x55, (0xC0 | encode)); emit_int16(0x55, (0xC0 | encode));
} }
void Assembler::vpopcntq(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x55, (0xC0 | encode));
}
void Assembler::popf() { void Assembler::popf() {
emit_int8((unsigned char)0x9D); emit_int8((unsigned char)0x9D);
} }

View File

@ -1869,6 +1869,7 @@ private:
void popcntl(Register dst, Register src); void popcntl(Register dst, Register src);
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len); void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
#ifdef _LP64 #ifdef _LP64
void popcntq(Register dst, Address src); void popcntq(Register dst, Address src);

View File

@ -1405,6 +1405,7 @@ const bool Matcher::match_rule_supported(int opcode) {
} }
break; break;
case Op_PopCountVI: case Op_PopCountVI:
case Op_PopCountVL:
if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) { if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
return false; return false;
} }
@ -8590,6 +8591,20 @@ instruct vpopcountI(vec dst, vec src) %{
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct vpopcountL(vec dst, vec src) %{
match(Set dst (PopCountVL src));
format %{ "vpopcntq $dst,$src\t! vector popcount packedL" %}
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
int vlen_enc = vector_length_encoding(this, $src);
__ vpopcntq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// --------------------------------- Bitwise Ternary Logic ---------------------------------- // --------------------------------- Bitwise Ternary Logic ----------------------------------
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -4235,7 +4235,7 @@ bool MatchRule::is_vector() const {
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X", "VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X", "VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked", "VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
"FmaVD","FmaVF","PopCountVI","VectorLongToMask", "FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
// Next are vector mask ops. // Next are vector mask ops.
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast", "MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
// Next are not supported currently. // Next are not supported currently.

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -272,6 +272,7 @@ macro(Phi)
macro(PopCountI) macro(PopCountI)
macro(PopCountL) macro(PopCountL)
macro(PopCountVI) macro(PopCountVI)
macro(PopCountVL)
macro(PrefetchAllocation) macro(PrefetchAllocation)
macro(Proj) macro(Proj)
macro(RShiftI) macro(RShiftI)

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -2553,7 +2553,7 @@ void SuperWord::output() {
opc == Op_AbsF || opc == Op_AbsD || opc == Op_AbsF || opc == Op_AbsD ||
opc == Op_AbsI || opc == Op_AbsL || opc == Op_AbsI || opc == Op_AbsL ||
opc == Op_NegF || opc == Op_NegD || opc == Op_NegF || opc == Op_NegD ||
opc == Op_PopCountI) { opc == Op_PopCountI || opc == Op_PopCountL) {
assert(n->req() == 2, "only one input expected"); assert(n->req() == 2, "only one input expected");
Node* in = vector_opd(p, 1); Node* in = vector_opd(p, 1);
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n)); vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
@ -2928,6 +2928,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
} }
return true; return true;
} }
if (VectorNode::is_muladds2i(use)) { if (VectorNode::is_muladds2i(use)) {
// MulAddS2I takes shorts and produces ints - hence the special checks // MulAddS2I takes shorts and produces ints - hence the special checks
// on alignment and size. // on alignment and size.
@ -2943,6 +2944,24 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
} }
return true; return true;
} }
if (VectorNode::is_vpopcnt_long(use)) {
// VPOPCNT_LONG takes long and produces int - hence the special checks
// on alignment and size.
if (u_pk->size() != d_pk->size()) {
return false;
}
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
Node* ui = u_pk->at(i);
Node* di = d_pk->at(i);
if (alignment(ui) * 2 != alignment(di)) {
return false;
}
}
return true;
}
if (u_pk->size() != d_pk->size()) if (u_pk->size() != d_pk->size())
return false; return false;
for (uint i = 0; i < u_pk->size(); i++) { for (uint i = 0; i < u_pk->size(); i++) {

View File

@ -154,6 +154,8 @@ int VectorNode::opcode(int sopc, BasicType bt) {
// Unimplemented for subword types since bit count changes // Unimplemented for subword types since bit count changes
// depending on size of lane (and sign bit). // depending on size of lane (and sign bit).
return (bt == T_INT ? Op_PopCountVI : 0); return (bt == T_INT ? Op_PopCountVI : 0);
case Op_PopCountL:
return Op_PopCountVL;
case Op_LShiftI: case Op_LShiftI:
switch (bt) { switch (bt) {
case T_BOOLEAN: case T_BOOLEAN:
@ -297,6 +299,16 @@ bool VectorNode::is_muladds2i(Node* n) {
return false; return false;
} }
bool VectorNode::is_vpopcnt_long(Node* n) {
if (n->Opcode() == Op_PopCountL) {
return true;
}
return false;
}
bool VectorNode::is_roundopD(Node* n) { bool VectorNode::is_roundopD(Node* n) {
if (n->Opcode() == Op_RoundDoubleMode) { if (n->Opcode() == Op_RoundDoubleMode) {
return true; return true;
@ -531,6 +543,7 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
case Op_SqrtVD: return new SqrtVDNode(n1, vt); case Op_SqrtVD: return new SqrtVDNode(n1, vt);
case Op_PopCountVI: return new PopCountVINode(n1, vt); case Op_PopCountVI: return new PopCountVINode(n1, vt);
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt); case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
case Op_RotateRightV: return new RotateRightVNode(n1, n2, vt); case Op_RotateRightV: return new RotateRightVNode(n1, n2, vt);

View File

@ -93,6 +93,7 @@ class VectorNode : public TypeNode {
static bool is_type_transition_short_to_int(Node* n); static bool is_type_transition_short_to_int(Node* n);
static bool is_type_transition_to_int(Node* n); static bool is_type_transition_to_int(Node* n);
static bool is_muladds2i(Node* n); static bool is_muladds2i(Node* n);
static bool is_vpopcnt_long(Node* n);
static bool is_roundopD(Node* n); static bool is_roundopD(Node* n);
static bool is_scalar_rotate(Node* n); static bool is_scalar_rotate(Node* n);
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt); static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
@ -505,6 +506,14 @@ class PopCountVINode : public VectorNode {
virtual int Opcode() const; virtual int Opcode() const;
}; };
//------------------------------PopCountVLNode---------------------------------
// Vector popcount long bits
class PopCountVLNode : public VectorNode {
public:
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
virtual int Opcode() const;
};
//------------------------------SqrtVFNode-------------------------------------- //------------------------------SqrtVFNode--------------------------------------
// Vector Sqrt float // Vector Sqrt float
class SqrtVFNode : public VectorNode { class SqrtVFNode : public VectorNode {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -1767,6 +1767,7 @@
declare_c2_type(DivVFNode, VectorNode) \ declare_c2_type(DivVFNode, VectorNode) \
declare_c2_type(DivVDNode, VectorNode) \ declare_c2_type(DivVDNode, VectorNode) \
declare_c2_type(PopCountVINode, VectorNode) \ declare_c2_type(PopCountVINode, VectorNode) \
declare_c2_type(PopCountVLNode, VectorNode) \
declare_c2_type(LShiftVBNode, VectorNode) \ declare_c2_type(LShiftVBNode, VectorNode) \
declare_c2_type(LShiftVSNode, VectorNode) \ declare_c2_type(LShiftVSNode, VectorNode) \
declare_c2_type(LShiftVINode, VectorNode) \ declare_c2_type(LShiftVINode, VectorNode) \

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @summary Test vectorization of popcount for Long
* @requires vm.cpu.features ~= ".*avx512dq.*"
* @requires vm.compiler2.enabled
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
* @library /test/lib /
* @run driver compiler.vectorization.TestPopCountVectorLong
*/
package compiler.vectorization;
import compiler.lib.ir_framework.*;
import java.util.Random;
public class TestPopCountVectorLong {
private long[] input;
private int[] output;
private static final int LEN = 1024;
private Random rng;
public static void main(String args[]) {
TestFramework.run(TestPopCountVectorLong.class);
}
public TestPopCountVectorLong() {
input = new long[LEN];
output = new int[LEN];
rng = new Random(42);
for (int i = 0; i < LEN; ++i) {
input[i] = rng.nextLong();
}
}
@Test // needs to be run in (fast) debug mode
@Warmup(10000)
@IR(counts = {"PopCountVL", ">= 1"}) // Atleast one PopCountVL node is generated if vectorization is successful
public void vectorizeBitCount() {
for (int i = 0; i < LEN; ++i) {
output[i] = Long.bitCount(input[i]);
}
checkResult();
}
public void checkResult() {
for (int i = 0; i < LEN; ++i) {
int expected = Long.bitCount(input[i]);
if (output[i] != expected) {
throw new RuntimeException("Invalid result: output[" + i + "] = " + output[i] + " != " + expected);
}
}
}
}

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
import java.util.random.RandomGenerator;
import java.util.random.RandomGeneratorFactory;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public abstract class VectorBitCount {
@Param({"1024"})
public int SIZE;
@Param("0")
private int seed;
private RandomGenerator rng = RandomGeneratorFactory.getDefault().create(seed);
private int[] bufferRandInts;
private long[] bufferRandLongs;
private int[] bitCounts;
@Setup
public void init() {
bufferRandInts = new int[SIZE];
bufferRandLongs = new long[SIZE];
bitCounts = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
bufferRandInts[i] = rng.nextInt();
bufferRandLongs[i] = rng.nextLong();
}
}
@Benchmark
public int[] intBitCount() {
for (int i = 0; i < SIZE; i++) {
bitCounts[i] = Integer.bitCount(bufferRandInts[i]);
}
return bitCounts;
}
@Benchmark
public int[] longBitCount() {
for (int i = 0; i < SIZE; i++) {
bitCounts[i] = Long.bitCount(bufferRandLongs[i]);
}
return bitCounts;
}
@Fork(value = 1, jvmArgsPrepend = {
"-XX:+UseSuperWord"
})
public static class WithSuperword extends VectorBitCount {
}
@Fork(value = 1, jvmArgsPrepend = {
"-XX:-UseSuperWord"
})
public static class NoSuperword extends VectorBitCount {
}
}