8214239: Missing x86_64.ad patterns for clearing and setting long vector bits

Reviewed-by: kvn, vlivanov, jrose, sviswanathan
This commit is contained in:
Bernard Blaser 2019-11-13 11:21:15 +01:00
parent 0a5f074a77
commit b8b88853a5
5 changed files with 269 additions and 0 deletions

View File

@ -9163,6 +9163,26 @@ void Assembler::notq(Register dst) {
emit_int8((unsigned char)(0xD0 | encode));
}
void Assembler::btsq(Address dst, int imm8) {
assert(isByte(imm8), "not a byte");
InstructionMark im(this);
prefixq(dst);
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)0xBA);
emit_operand(rbp /* 5 */, dst, 1);
emit_int8(imm8);
}
void Assembler::btrq(Address dst, int imm8) {
assert(isByte(imm8), "not a byte");
InstructionMark im(this);
prefixq(dst);
emit_int8((unsigned char)0x0F);
emit_int8((unsigned char)0xBA);
emit_operand(rsi /* 6 */, dst, 1);
emit_int8(imm8);
}
void Assembler::orq(Address dst, int32_t imm32) {
InstructionMark im(this);
prefixq(dst);

View File

@ -1592,6 +1592,9 @@ private:
#ifdef _LP64
void notq(Register dst);
void btsq(Address dst, int imm8);
void btrq(Address dst, int imm8);
#endif
void orl(Address dst, int32_t imm32);

View File

@ -3116,6 +3116,26 @@ operand immL32()
interface(CONST_INTER);
%}
operand immL_Pow2()
%{
predicate(is_power_of_2_long(n->get_long()));
match(ConL);
op_cost(15);
format %{ %}
interface(CONST_INTER);
%}
operand immL_NotPow2()
%{
predicate(is_power_of_2_long(~n->get_long()));
match(ConL);
op_cost(15);
format %{ %}
interface(CONST_INTER);
%}
// Long Immediate zero
operand immL0()
%{
@ -9841,6 +9861,23 @@ instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
%{
// con should be a pure 64-bit immediate given that not(con) is a power of 2
// because AND/OR works well enough for 8/32-bit values.
predicate(log2_long(~n->in(3)->in(2)->get_long()) > 30);
match(Set dst (StoreL dst (AndL (LoadL dst) con)));
effect(KILL cr);
ins_cost(125);
format %{ "btrq $dst, log2(not($con))\t# long" %}
ins_encode %{
__ btrq($dst$$Address, log2_long(~$con$$constant));
%}
ins_pipe(ialu_mem_imm);
%}
// BMI1 instructions
instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
@ -10034,6 +10071,23 @@ instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
%{
// con should be a pure 64-bit power of 2 immediate
// because AND/OR works well enough for 8/32-bit values.
predicate(log2_long(n->in(3)->in(2)->get_long()) > 31);
match(Set dst (StoreL dst (OrL (LoadL dst) con)));
effect(KILL cr);
ins_cost(125);
format %{ "btsq $dst, log2($con)\t# long" %}
ins_encode %{
__ btsq($dst$$Address, log2_long($con$$constant));
%}
ins_pipe(ialu_mem_imm);
%}
// Xor Instructions
// Xor Register with Register
instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8214239
* @summary Missing x86_64.ad patterns for clearing and setting long vector bits
*
* @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions
* -XX:-TieredCompilation -XX:CompileThreshold=1000
* -XX:CompileCommand=print,compiler/c2/TestBitSetAndReset.test*
* -XX:CompileCommand=compileonly,compiler/c2/TestBitSetAndReset.test*
* -XX:CompileCommand=dontinline,compiler/c2/TestBitSetAndReset.test*
* compiler.c2.TestBitSetAndReset
*/
package compiler.c2;
public class TestBitSetAndReset {
private static final int COUNT = 10_000;
private static final long MASK63 = 0x8000_0000_0000_0000L;
private static final long MASK32 = 0x0000_0001_0000_0000L;
private static final long MASK31 = 0x0000_0000_8000_0000L;
private static final long MASK15 = 0x0000_0000_0000_8000L;
private static final long MASK00 = 0x0000_0000_0000_0001L;
private static long andq, orq;
public static void main(String... args) {
boolean success = true;
for (int i=0; i<COUNT; i++) {
andq = MASK63 | MASK31 | MASK15 | MASK00;
orq = 0;
test63();
test32();
test31();
test15();
test00();
success &= andq == 0 && orq == (MASK63 | MASK32 | MASK31 | MASK15 | MASK00);
}
if (!success)
throw new AssertionError("Failure while setting or clearing long vector bits!");
}
private static void test63() {
andq &= ~MASK63;
orq |= MASK63;
}
private static void test32() {
andq &= ~MASK32;
orq |= MASK32;
}
private static void test31() {
andq &= ~MASK31;
orq |= MASK31;
}
private static void test15() {
andq &= ~MASK15;
orq |= MASK15;
}
private static void test00() {
andq &= ~MASK00;
orq |= MASK00;
}
}

View File

@ -0,0 +1,105 @@
/*
* Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class BitSetAndReset {
private static final int COUNT = 10_000;
private static final long MASK63 = 0x8000_0000_0000_0000L;
private static final long MASK31 = 0x0000_0000_8000_0000L;
private static final long MASK15 = 0x0000_0000_0000_8000L;
private static final long MASK00 = 0x0000_0000_0000_0001L;
private long andq, orq;
private boolean success = true;
@TearDown(Level.Iteration)
public void finish() {
if (!success)
throw new AssertionError("Failure while setting or clearing long vector bits!");
}
@Benchmark
public void bitSet(Blackhole bh) {
for (int i=0; i<COUNT; i++) {
andq = MASK63 | MASK31 | MASK15 | MASK00;
orq = 0;
bh.consume(test63());
bh.consume(test31());
bh.consume(test15());
bh.consume(test00());
success &= andq == 0 && orq == (MASK63 | MASK31 | MASK15 | MASK00);
}
}
private long test63() {
andq &= ~MASK63;
orq |= MASK63;
return 0L;
}
private long test31() {
andq &= ~MASK31;
orq |= MASK31;
return 0L;
}
private long test15() {
andq &= ~MASK15;
orq |= MASK15;
return 0L;
}
private long test00() {
andq &= ~MASK00;
orq |= MASK00;
return 0L;
}
private static final long MASK62 = 0x4000_0000_0000_0000L;
private static final long MASK61 = 0x2000_0000_0000_0000L;
private static final long MASK60 = 0x1000_0000_0000_0000L;
private long orq63, orq62, orq61, orq60;
@Benchmark
public void throughput(Blackhole bh) {
for (int i=0; i<COUNT; i++) {
orq63 = orq62 = orq61 = orq60 = 0;
bh.consume(testTp());
}
}
private long testTp() {
orq63 |= MASK63;
orq62 |= MASK62;
orq61 |= MASK61;
orq60 |= MASK60;
return 0L;
}
}