8051725: Improve expansion of Conv2B nodes in the middle-end

Reviewed-by: thartmann, qamai, sviswanathan
This commit is contained in:
Jasmine Karthikeyan 2023-05-30 14:11:12 +00:00 committed by Tobias Hartmann
parent 3eced01f9e
commit fb0b1f0c23
13 changed files with 277 additions and 152 deletions

@ -15018,42 +15018,6 @@ instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
ins_pipe(ialu_reg);
%}
instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
%{
match(Set dst (Conv2B src));
effect(KILL cr);
format %{
"cmpw $src, zr\n\t"
"cset $dst, ne"
%}
ins_encode %{
__ cmpw(as_Register($src$$reg), zr);
__ cset(as_Register($dst$$reg), Assembler::NE);
%}
ins_pipe(ialu_reg);
%}
instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
%{
match(Set dst (Conv2B src));
effect(KILL cr);
format %{
"cmp $src, zr\n\t"
"cset $dst, ne"
%}
ins_encode %{
__ cmp(as_Register($src$$reg), zr);
__ cset(as_Register($dst$$reg), Assembler::NE);
%}
ins_pipe(ialu_reg);
%}
instruct convD2F_reg(vRegF dst, vRegD src) %{
match(Set dst (ConvD2F src));

@ -7042,39 +7042,6 @@ instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{
ins_pipe(ialu_reg_imm);
%}
//----------Convert to Boolean-------------------------------------------------
instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
match(Set dst (Conv2B src));
effect(KILL ccr);
size(12);
ins_cost(DEFAULT_COST*2);
format %{ "TST $src,$src \n\t"
"MOV $dst, 0 \n\t"
"MOV.ne $dst, 1" %}
ins_encode %{ // FIXME: can do better?
__ tst($src$$Register, $src$$Register);
__ mov($dst$$Register, 0);
__ mov($dst$$Register, 1, ne);
%}
ins_pipe(ialu_reg_ialu);
%}
instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
match(Set dst (Conv2B src));
effect(KILL ccr);
size(12);
ins_cost(DEFAULT_COST*2);
format %{ "TST $src,$src \n\t"
"MOV $dst, 0 \n\t"
"MOV.ne $dst, 1" %}
ins_encode %{
__ tst($src$$Register, $src$$Register);
__ mov($dst$$Register, 0);
__ mov($dst$$Register, 1, ne);
%}
ins_pipe(ialu_reg_ialu);
%}
instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
match(Set dst (CmpLTMask p q));
effect( KILL ccr );

@ -5888,21 +5888,6 @@ void Assembler::setb(Condition cc, Register dst) {
emit_int24(0x0F, (unsigned char)0x90 | cc, (0xC0 | encode));
}
void Assembler::sete(Register dst) {
int encode = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x94, (0xC0 | encode));
}
void Assembler::setl(Register dst) {
int encode = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x9C, (0xC0 | encode));
}
void Assembler::setne(Register dst) {
int encode = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x95, (0xC0 | encode));
}
void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_ssse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@ -12406,19 +12391,10 @@ void Assembler::pusha() { // 32bit
emit_int8(0x60);
}
void Assembler::set_byte_if_not_zero(Register dst) {
emit_int24(0x0F, (unsigned char)0x95, (0xC0 | dst->encoding()));
}
#else // LP64
// 64bit only pieces of the assembler
void Assembler::set_byte_if_not_zero(Register dst) {
int enc = prefix_and_encode(dst->encoding(), true);
emit_int24(0x0F, (unsigned char)0x95, (0xC0 | enc));
}
// This should only be used by 64bit instructions that can use rip-relative
// it cannot be used by instructions that want an immediate value.

@ -2087,10 +2087,6 @@ private:
void setb(Condition cc, Register dst);
void sete(Register dst);
void setl(Register dst);
void setne(Register dst);
void palignr(XMMRegister dst, XMMRegister src, int imm8);
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
@ -2238,8 +2234,6 @@ private:
void xorq(Register dst, int32_t imm32);
void xorq(Address dst, Register src);
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
// AVX 3-operands scalar instructions (encoded with VEX prefix)
void vaddsd(XMMRegister dst, XMMRegister nds, Address src);

@ -2834,7 +2834,7 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op
__ cmpptr(left->as_register_lo(), right->as_register_lo());
__ movl(dest, -1);
__ jccb(Assembler::less, done);
__ set_byte_if_not_zero(dest);
__ setb(Assembler::notZero, dest);
__ movzbl(dest, dest);
__ bind(done);
#else

@ -8093,7 +8093,7 @@ instruct compareAndSwapP(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgq($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -8116,7 +8116,7 @@ instruct compareAndSwapL(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgq($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -8138,7 +8138,7 @@ instruct compareAndSwapI(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgl($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -8160,7 +8160,7 @@ instruct compareAndSwapB(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgb($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -8182,7 +8182,7 @@ instruct compareAndSwapS(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgw($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -8203,7 +8203,7 @@ instruct compareAndSwapN(rRegI res,
ins_encode %{
__ lock();
__ cmpxchgl($newval$$Register, $mem_ptr$$Address);
__ sete($res$$Register);
__ setb(Assembler::equal, $res$$Register);
__ movzbl($res$$Register, $res$$Register);
%}
ins_pipe( pipe_cmpxchg );
@ -10626,40 +10626,6 @@ instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
// Convert Int to Boolean
instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
%{
match(Set dst (Conv2B src));
effect(KILL cr);
format %{ "testl $src, $src\t# ci2b\n\t"
"setnz $dst\n\t"
"movzbl $dst, $dst" %}
ins_encode %{
__ testl($src$$Register, $src$$Register);
__ set_byte_if_not_zero($dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe(pipe_slow); // XXX
%}
// Convert Pointer to Boolean
instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
%{
match(Set dst (Conv2B src));
effect(KILL cr);
format %{ "testq $src, $src\t# cp2b\n\t"
"setnz $dst\n\t"
"movzbl $dst, $dst" %}
ins_encode %{
__ testq($src$$Register, $src$$Register);
__ set_byte_if_not_zero($dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe(pipe_slow); // XXX
%}
instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
%{
match(Set dst (CmpLTMask p q));
@ -10672,7 +10638,7 @@ instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
"negl $dst" %}
ins_encode %{
__ cmpl($p$$Register, $q$$Register);
__ setl($dst$$Register);
__ setb(Assembler::less, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
__ negl($dst$$Register);
%}
@ -12828,7 +12794,7 @@ instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
__ cmpl($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::below, done);
__ setne($dst$$Register);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
__ bind(done);
%}
@ -12854,7 +12820,7 @@ instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
__ cmpq($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::less, done);
__ setne($dst$$Register);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
__ bind(done);
%}
@ -12880,7 +12846,7 @@ instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
__ cmpq($src1$$Register, $src2$$Register);
__ movl($dst$$Register, -1);
__ jccb(Assembler::below, done);
__ setne($dst$$Register);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
__ bind(done);
%}

@ -886,6 +886,34 @@ Node* XorINode::Ideal(PhaseGVN* phase, bool can_reshape) {
phase->record_for_igvn(this);
}
}
// Propagate xor through constant cmoves. This pattern can occur after expansion of Conv2B nodes.
const TypeInt* in2_type = phase->type(in2)->isa_int();
if (in1->Opcode() == Op_CMoveI && in2_type != nullptr && in2_type->is_con()) {
int in2_val = in2_type->get_con();
// Get types of both sides of the CMove
const TypeInt* left = phase->type(in1->in(CMoveNode::IfFalse))->isa_int();
const TypeInt* right = phase->type(in1->in(CMoveNode::IfTrue))->isa_int();
// Ensure that both sides are int constants
if (left != nullptr && right != nullptr && left->is_con() && right->is_con()) {
Node* cond = in1->in(CMoveNode::Condition);
// Check that the comparison is a bool and that the cmp node type is correct
if (cond->is_Bool()) {
int cmp_op = cond->in(1)->Opcode();
if (cmp_op == Op_CmpI || cmp_op == Op_CmpP) {
int l_val = left->get_con();
int r_val = right->get_con();
return new CMoveINode(cond, phase->intcon(l_val ^ in2_val), phase->intcon(r_val ^ in2_val), TypeInt::INT);
}
}
}
}
return AddNode::Ideal(phase, can_reshape);
}

@ -1522,6 +1522,12 @@ Node* PhiNode::unique_input(PhaseValues* phase, bool uncast) {
// Convert Phi to an ConvIB.
static Node *is_x2logic( PhaseGVN *phase, PhiNode *phi, int true_path ) {
assert(true_path !=0, "only diamond shape graph expected");
// If we're late in the optimization process, we may have already expanded Conv2B nodes
if (phase->C->post_loop_opts_phase() && !Matcher::match_rule_supported(Op_Conv2B)) {
return nullptr;
}
// Convert the true/false index into an expected 0/1 return.
// Map 2->0 and 1->1.
int flipped = 2-true_path;
@ -1564,9 +1570,10 @@ static Node *is_x2logic( PhaseGVN *phase, PhiNode *phi, int true_path ) {
} else return nullptr;
// Build int->bool conversion
Node *n = new Conv2BNode(cmp->in(1));
if( flipped )
n = new XorINode( phase->transform(n), phase->intcon(1) );
Node* n = new Conv2BNode(cmp->in(1));
if (flipped) {
n = new XorINode(phase->transform(n), phase->intcon(1));
}
return n;
}

@ -25,8 +25,10 @@
#include "precompiled.hpp"
#include "opto/addnode.hpp"
#include "opto/castnode.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/matcher.hpp"
#include "opto/movenode.hpp"
#include "opto/phaseX.hpp"
#include "opto/subnode.hpp"
#include "runtime/stubRoutines.hpp"
@ -61,6 +63,30 @@ const Type* Conv2BNode::Value(PhaseGVN* phase) const {
return TypeInt::BOOL;
}
Node* Conv2BNode::Ideal(PhaseGVN* phase, bool can_reshape) {
if (!Matcher::match_rule_supported(Op_Conv2B)) {
if (phase->C->post_loop_opts_phase()) {
// Get type of comparison to make
const Type* t = phase->type(in(1));
Node* cmp = nullptr;
if (t->isa_int()) {
cmp = phase->transform(new CmpINode(in(1), phase->intcon(0)));
} else if (t->isa_ptr()) {
cmp = phase->transform(new CmpPNode(in(1), phase->zerocon(BasicType::T_OBJECT)));
} else {
assert(false, "Unrecognized comparison for Conv2B: %s", NodeClassNames[in(1)->Opcode()]);
}
// Replace Conv2B with the cmove
Node* bol = phase->transform(new BoolNode(cmp, BoolTest::eq));
return new CMoveINode(bol, phase->intcon(1), phase->intcon(0), TypeInt::BOOL);
} else {
phase->C->record_for_post_loop_opts_igvn(this);
}
}
return nullptr;
}
// The conversions operations are all Alpha sorted. Please keep it that way!
//=============================================================================

@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,10 +33,11 @@
// Convert int/pointer to a Boolean. Map zero to zero, all else to 1.
class Conv2BNode : public Node {
public:
Conv2BNode( Node *i ) : Node(0,i) {}
Conv2BNode(Node* i) : Node(nullptr, i) {}
virtual int Opcode() const;
virtual const Type *bottom_type() const { return TypeInt::BOOL; }
virtual Node* Identity(PhaseGVN* phase);
virtual Node* Ideal(PhaseGVN* phase, bool can_reshape);
virtual const Type* Value(PhaseGVN* phase) const;
virtual uint ideal_reg() const { return Op_RegI; }
};

@ -26,6 +26,7 @@
#include "opto/addnode.hpp"
#include "opto/connode.hpp"
#include "opto/convertnode.hpp"
#include "opto/matcher.hpp"
#include "opto/movenode.hpp"
#include "opto/phaseX.hpp"
#include "opto/subnode.hpp"
@ -207,6 +208,11 @@ Node *CMoveINode::Ideal(PhaseGVN *phase, bool can_reshape) {
}
}
// If we're late in the optimization process, we may have already expanded Conv2B nodes
if (phase->C->post_loop_opts_phase() && !Matcher::match_rule_supported(Op_Conv2B)) {
return nullptr;
}
// Now check for booleans
int flip = 0;
@ -238,9 +244,10 @@ Node *CMoveINode::Ideal(PhaseGVN *phase, bool can_reshape) {
// Convert to a bool (flipped)
// Build int->bool conversion
if (PrintOpto) { tty->print_cr("CMOV to I2B"); }
Node *n = new Conv2BNode( cmp->in(1) );
if( flip )
n = new XorINode( phase->transform(n), phase->intcon(1) );
Node* n = new Conv2BNode(cmp->in(1));
if (flip) {
n = new XorINode(phase->transform(n), phase->intcon(1));
}
return n;
}

@ -0,0 +1,92 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2.irTests;
import jdk.test.lib.Asserts;
import compiler.lib.ir_framework.*;
import java.util.Random;
import jdk.test.lib.Utils;
/*
* @test
* @summary Test that patterns leading to Conv2B are correctly expanded.
* @bug 8051725
* @library /test/lib /
* @requires vm.compiler2.enabled
* @requires os.arch == "x86_64" | os.arch == "amd64" | os.arch == "aarch64"
* @run driver compiler.c2.irTests.TestConv2BExpansion
*/
public class TestConv2BExpansion {
public static void main(String[] args) {
TestFramework.run();
}
@Test
@IR(counts = {IRNode.CMOVE_I, "1"}, failOn = {IRNode.XOR})
public boolean testIntEquals0(int x) {
return x == 0;
}
@Test
@IR(counts = {IRNode.CMOVE_I, "1"})
public boolean testIntNotEquals0(int x) {
return x != 0;
}
@Test
@IR(counts = {IRNode.CMOVE_I, "1"}, failOn = {IRNode.XOR})
public boolean testObjEqualsNull(Object o) {
return o == null;
}
@Test
@IR(counts = {IRNode.CMOVE_I, "1"})
public boolean testObjNotEqualsNull(Object o) {
return o != null;
}
@Run(test = {"testIntEquals0", "testIntNotEquals0"})
public void runTestInts() {
assertResult(0);
assertResult(1);
}
@Run(test = {"testObjEqualsNull", "testObjNotEqualsNull"})
public void runTestObjs() {
assertResult(new Object());
assertResult(null);
}
@DontCompile
public void assertResult(int x) {
Asserts.assertEQ(x == 0, testIntEquals0(x));
Asserts.assertEQ(x != 0, testIntNotEquals0(x));
}
@DontCompile
public void assertResult(Object o) {
Asserts.assertEQ(o == null, testObjEqualsNull(o));
Asserts.assertEQ(o != null, testObjNotEqualsNull(o));
}
}

@ -0,0 +1,97 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler.x86;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import java.util.concurrent.TimeUnit;
import java.util.Random;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Measurement(iterations = 4, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Warmup(iterations = 3, time = 1000, timeUnit = TimeUnit.MILLISECONDS)
@Fork(3)
public class Conv2BRules {
@Benchmark
public void testNotEquals0(Blackhole blackhole, BenchState state) {
for (int i = 0; i < 128; i++) {
int j = state.ints[i];
blackhole.consume(j != 0);
}
}
@Benchmark
public void testEquals0(Blackhole blackhole, BenchState state) {
for (int i = 0; i < 128; i++) {
int j = state.ints[i];
blackhole.consume(j == 0);
}
}
@Benchmark
public void testEquals1(Blackhole blackhole, BenchState state) {
for (int i = 0; i < 128; i++) {
int j = state.ints[i];
blackhole.consume(j == 1);
}
}
@Benchmark
public void testNotEqualsNull(Blackhole blackhole, BenchState state) {
for (int i = 0; i < 128; i++) {
Object o = state.objs[i];
blackhole.consume(o != null);
}
}
@Benchmark
public void testEqualsNull(Blackhole blackhole, BenchState state) {
for (int i = 0; i < 128; i++) {
Object o = state.objs[i];
blackhole.consume(o == null);
}
}
@State(Scope.Benchmark)
public static class BenchState {
int[] ints;
Object[] objs;
public BenchState() {
}
@Setup(Level.Iteration)
public void setup() {
Random random = new Random(1000);
ints = new int[128];
objs = new Object[128];
for (int i = 0; i < 128; i++) {
ints[i] = random.nextInt(3);
objs[i] = random.nextInt(3) == 0 ? null : new Object();
}
}
}
}