8278868: Add x86 vectorization support for Long.bitCount()
Reviewed-by: jbhateja, sviswanathan, kvn
This commit is contained in:
parent
67141849d9
commit
c4518e257c
@ -4829,6 +4829,14 @@ void Assembler::vpopcntd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
emit_int16(0x55, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpopcntq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512_vpopcntdq(), "must support vpopcntdq feature");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16(0x55, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::popf() {
|
||||
emit_int8((unsigned char)0x9D);
|
||||
}
|
||||
|
@ -1869,6 +1869,7 @@ private:
|
||||
void popcntl(Register dst, Register src);
|
||||
|
||||
void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
#ifdef _LP64
|
||||
void popcntq(Register dst, Address src);
|
||||
|
@ -1405,6 +1405,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
}
|
||||
break;
|
||||
case Op_PopCountVI:
|
||||
case Op_PopCountVL:
|
||||
if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
|
||||
return false;
|
||||
}
|
||||
@ -8590,6 +8591,20 @@ instruct vpopcountI(vec dst, vec src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vpopcountL(vec dst, vec src) %{
|
||||
match(Set dst (PopCountVL src));
|
||||
format %{ "vpopcntq $dst,$src\t! vector popcount packedL" %}
|
||||
ins_encode %{
|
||||
assert(UsePopCountInstruction, "not enabled");
|
||||
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
__ vpopcntq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
||||
__ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
||||
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// --------------------------------- Bitwise Ternary Logic ----------------------------------
|
||||
|
||||
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -4235,7 +4235,7 @@ bool MatchRule::is_vector() const {
|
||||
"VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
|
||||
"VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
|
||||
"VectorMaskWrapper","VectorMaskCmp","VectorReinterpret","LoadVectorMasked","StoreVectorMasked",
|
||||
"FmaVD","FmaVF","PopCountVI","VectorLongToMask",
|
||||
"FmaVD","FmaVF","PopCountVI", "PopCountVL", "VectorLongToMask",
|
||||
// Next are vector mask ops.
|
||||
"MaskAll", "AndVMask", "OrVMask", "XorVMask", "VectorMaskCast",
|
||||
// Next are not supported currently.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -272,6 +272,7 @@ macro(Phi)
|
||||
macro(PopCountI)
|
||||
macro(PopCountL)
|
||||
macro(PopCountVI)
|
||||
macro(PopCountVL)
|
||||
macro(PrefetchAllocation)
|
||||
macro(Proj)
|
||||
macro(RShiftI)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2553,7 +2553,7 @@ void SuperWord::output() {
|
||||
opc == Op_AbsF || opc == Op_AbsD ||
|
||||
opc == Op_AbsI || opc == Op_AbsL ||
|
||||
opc == Op_NegF || opc == Op_NegD ||
|
||||
opc == Op_PopCountI) {
|
||||
opc == Op_PopCountI || opc == Op_PopCountL) {
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
Node* in = vector_opd(p, 1);
|
||||
vn = VectorNode::make(opc, in, NULL, vlen, velt_basic_type(n));
|
||||
@ -2928,6 +2928,7 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (VectorNode::is_muladds2i(use)) {
|
||||
// MulAddS2I takes shorts and produces ints - hence the special checks
|
||||
// on alignment and size.
|
||||
@ -2943,6 +2944,24 @@ bool SuperWord::is_vector_use(Node* use, int u_idx) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
if (VectorNode::is_vpopcnt_long(use)) {
|
||||
// VPOPCNT_LONG takes long and produces int - hence the special checks
|
||||
// on alignment and size.
|
||||
if (u_pk->size() != d_pk->size()) {
|
||||
return false;
|
||||
}
|
||||
for (uint i = 0; i < MIN2(d_pk->size(), u_pk->size()); i++) {
|
||||
Node* ui = u_pk->at(i);
|
||||
Node* di = d_pk->at(i);
|
||||
if (alignment(ui) * 2 != alignment(di)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
if (u_pk->size() != d_pk->size())
|
||||
return false;
|
||||
for (uint i = 0; i < u_pk->size(); i++) {
|
||||
|
@ -154,6 +154,8 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
// Unimplemented for subword types since bit count changes
|
||||
// depending on size of lane (and sign bit).
|
||||
return (bt == T_INT ? Op_PopCountVI : 0);
|
||||
case Op_PopCountL:
|
||||
return Op_PopCountVL;
|
||||
case Op_LShiftI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
@ -297,6 +299,16 @@ bool VectorNode::is_muladds2i(Node* n) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VectorNode::is_vpopcnt_long(Node* n) {
|
||||
if (n->Opcode() == Op_PopCountL) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool VectorNode::is_roundopD(Node* n) {
|
||||
if (n->Opcode() == Op_RoundDoubleMode) {
|
||||
return true;
|
||||
@ -531,6 +543,7 @@ VectorNode* VectorNode::make(int vopc, Node* n1, Node* n2, const TypeVect* vt, b
|
||||
case Op_SqrtVD: return new SqrtVDNode(n1, vt);
|
||||
|
||||
case Op_PopCountVI: return new PopCountVINode(n1, vt);
|
||||
case Op_PopCountVL: return new PopCountVLNode(n1, vt);
|
||||
case Op_RotateLeftV: return new RotateLeftVNode(n1, n2, vt);
|
||||
case Op_RotateRightV: return new RotateRightVNode(n1, n2, vt);
|
||||
|
||||
|
@ -93,6 +93,7 @@ class VectorNode : public TypeNode {
|
||||
static bool is_type_transition_short_to_int(Node* n);
|
||||
static bool is_type_transition_to_int(Node* n);
|
||||
static bool is_muladds2i(Node* n);
|
||||
static bool is_vpopcnt_long(Node* n);
|
||||
static bool is_roundopD(Node* n);
|
||||
static bool is_scalar_rotate(Node* n);
|
||||
static bool is_vector_rotate_supported(int opc, uint vlen, BasicType bt);
|
||||
@ -505,6 +506,14 @@ class PopCountVINode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------PopCountVLNode---------------------------------
|
||||
// Vector popcount long bits
|
||||
class PopCountVLNode : public VectorNode {
|
||||
public:
|
||||
PopCountVLNode(Node* in, const TypeVect* vt) : VectorNode(in,vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------SqrtVFNode--------------------------------------
|
||||
// Vector Sqrt float
|
||||
class SqrtVFNode : public VectorNode {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -1767,6 +1767,7 @@
|
||||
declare_c2_type(DivVFNode, VectorNode) \
|
||||
declare_c2_type(DivVDNode, VectorNode) \
|
||||
declare_c2_type(PopCountVINode, VectorNode) \
|
||||
declare_c2_type(PopCountVLNode, VectorNode) \
|
||||
declare_c2_type(LShiftVBNode, VectorNode) \
|
||||
declare_c2_type(LShiftVSNode, VectorNode) \
|
||||
declare_c2_type(LShiftVINode, VectorNode) \
|
||||
|
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @summary Test vectorization of popcount for Long
|
||||
* @requires vm.cpu.features ~= ".*avx512dq.*"
|
||||
* @requires vm.compiler2.enabled
|
||||
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.vectorization.TestPopCountVectorLong
|
||||
*/
|
||||
|
||||
package compiler.vectorization;
|
||||
import compiler.lib.ir_framework.*;
|
||||
import java.util.Random;
|
||||
|
||||
|
||||
public class TestPopCountVectorLong {
|
||||
private long[] input;
|
||||
private int[] output;
|
||||
private static final int LEN = 1024;
|
||||
private Random rng;
|
||||
|
||||
public static void main(String args[]) {
|
||||
TestFramework.run(TestPopCountVectorLong.class);
|
||||
}
|
||||
|
||||
public TestPopCountVectorLong() {
|
||||
input = new long[LEN];
|
||||
output = new int[LEN];
|
||||
rng = new Random(42);
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
input[i] = rng.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Test // needs to be run in (fast) debug mode
|
||||
@Warmup(10000)
|
||||
@IR(counts = {"PopCountVL", ">= 1"}) // Atleast one PopCountVL node is generated if vectorization is successful
|
||||
public void vectorizeBitCount() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
output[i] = Long.bitCount(input[i]);
|
||||
}
|
||||
checkResult();
|
||||
}
|
||||
|
||||
public void checkResult() {
|
||||
for (int i = 0; i < LEN; ++i) {
|
||||
int expected = Long.bitCount(input[i]);
|
||||
if (output[i] != expected) {
|
||||
throw new RuntimeException("Invalid result: output[" + i + "] = " + output[i] + " != " + expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
87
test/micro/org/openjdk/bench/vm/compiler/VectorBitCount.java
Normal file
87
test/micro/org/openjdk/bench/vm/compiler/VectorBitCount.java
Normal file
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
import org.openjdk.jmh.infra.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.random.RandomGenerator;
|
||||
import java.util.random.RandomGeneratorFactory;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
public abstract class VectorBitCount {
|
||||
@Param({"1024"})
|
||||
public int SIZE;
|
||||
|
||||
@Param("0")
|
||||
private int seed;
|
||||
private RandomGenerator rng = RandomGeneratorFactory.getDefault().create(seed);
|
||||
private int[] bufferRandInts;
|
||||
private long[] bufferRandLongs;
|
||||
private int[] bitCounts;
|
||||
@Setup
|
||||
public void init() {
|
||||
bufferRandInts = new int[SIZE];
|
||||
bufferRandLongs = new long[SIZE];
|
||||
bitCounts = new int[SIZE];
|
||||
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bufferRandInts[i] = rng.nextInt();
|
||||
bufferRandLongs[i] = rng.nextLong();
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] intBitCount() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bitCounts[i] = Integer.bitCount(bufferRandInts[i]);
|
||||
}
|
||||
return bitCounts;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] longBitCount() {
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
bitCounts[i] = Long.bitCount(bufferRandLongs[i]);
|
||||
}
|
||||
return bitCounts;
|
||||
}
|
||||
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:+UseSuperWord"
|
||||
})
|
||||
public static class WithSuperword extends VectorBitCount {
|
||||
|
||||
}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:-UseSuperWord"
|
||||
})
|
||||
public static class NoSuperword extends VectorBitCount {
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user