From 947f14977a4d1ded839712aea020eaa87c23a23f Mon Sep 17 00:00:00 2001 From: Quan Anh Mai Date: Thu, 15 Jun 2023 16:00:18 +0000 Subject: [PATCH] 8308444: LoadStoreNode::result_not_used() is too conservative Reviewed-by: kvn, thartmann --- src/hotspot/cpu/x86/assembler_x86.cpp | 15 +++ src/hotspot/cpu/x86/assembler_x86.hpp | 2 + src/hotspot/cpu/x86/x86_64.ad | 100 ++++++++++++---- src/hotspot/share/opto/memnode.cpp | 16 ++- .../compiler/c2/irTests/TestGetAndAdd.java | 112 ++++++++++++++++++ .../compiler/lib/ir_framework/IRNode.java | 60 ++++++++++ 6 files changed, 279 insertions(+), 26 deletions(-) create mode 100644 test/hotspot/jtreg/compiler/c2/irTests/TestGetAndAdd.java diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index e76a8a49e0b..4e649c6e70c 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -1324,6 +1324,13 @@ void Assembler::addb(Address dst, int imm8) { emit_int8(imm8); } +void Assembler::addb(Address dst, Register src) { + InstructionMark im(this); + prefix(dst, src); + emit_int8(0x00); + emit_operand(src, dst, 0); +} + void Assembler::addw(Register dst, Register src) { emit_int8(0x66); (void)prefix_and_encode(dst->encoding(), src->encoding()); @@ -1339,6 +1346,14 @@ void Assembler::addw(Address dst, int imm16) { emit_int16(imm16); } +void Assembler::addw(Address dst, Register src) { + InstructionMark im(this); + emit_int8(0x66); + prefix(dst, src); + emit_int8(0x01); + emit_operand(src, dst, 0); +} + void Assembler::addl(Address dst, Register src) { InstructionMark im(this); prefix(dst, src); diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 60883f13f7a..b25beb38241 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -984,8 +984,10 @@ private: void adcq(Register dst, Register src); void addb(Address dst, int imm8); + void addb(Address dst, Register src); void addw(Register dst, Register src); void addw(Address dst, int imm16); + void addw(Address dst, Register src); void addl(Address dst, int32_t imm32); void addl(Address dst, Register src); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 3e0a7d6e2ed..abc43081afb 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -8312,96 +8312,148 @@ instruct compareAndExchangeP( ins_pipe( pipe_cmpxchg ); %} -instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ +instruct xaddB_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddB mem add)); effect(KILL cr); - format %{ "ADDB [$mem],$add" %} + format %{ "addb_lock $mem, $add" %} + ins_encode %{ + __ lock(); + __ addb($mem$$Address, $add$$Register); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct xaddB_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddB mem add)); + effect(KILL cr); + format %{ "addb_lock $mem, $add" %} ins_encode %{ __ lock(); __ addb($mem$$Address, $add$$constant); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{ +instruct xaddB(memory mem, rRegI newval, rFlagsReg cr) %{ + predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddB mem newval)); effect(KILL cr); - format %{ "XADDB [$mem],$newval" %} + format %{ "xaddb_lock $mem, $newval" %} ins_encode %{ __ lock(); __ xaddb($mem$$Address, $newval$$Register); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ +instruct xaddS_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddS mem add)); effect(KILL cr); - format %{ "ADDW [$mem],$add" %} + format %{ "addw_lock $mem, $add" %} + ins_encode %{ + __ lock(); + __ addw($mem$$Address, $add$$Register); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct xaddS_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(UseStoreImmI16 && n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddS mem add)); + effect(KILL cr); + format %{ "addw_lock $mem, $add" %} ins_encode %{ __ lock(); __ addw($mem$$Address, $add$$constant); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{ +instruct xaddS(memory mem, rRegI newval, rFlagsReg cr) %{ + predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddS mem newval)); effect(KILL cr); - format %{ "XADDW [$mem],$newval" %} + format %{ "xaddw_lock $mem, $newval" %} ins_encode %{ __ lock(); __ xaddw($mem$$Address, $newval$$Register); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{ +instruct xaddI_reg_no_res(memory mem, Universe dummy, rRegI add, rFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddI mem add)); effect(KILL cr); - format %{ "ADDL [$mem],$add" %} + format %{ "addl_lock $mem, $add" %} + ins_encode %{ + __ lock(); + __ addl($mem$$Address, $add$$Register); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct xaddI_imm_no_res(memory mem, Universe dummy, immI add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL cr); + format %{ "addl_lock $mem, $add" %} ins_encode %{ __ lock(); __ addl($mem$$Address, $add$$constant); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{ +instruct xaddI(memory mem, rRegI newval, rFlagsReg cr) %{ + predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddI mem newval)); effect(KILL cr); - format %{ "XADDL [$mem],$newval" %} + format %{ "xaddl_lock $mem, $newval" %} ins_encode %{ __ lock(); __ xaddl($mem$$Address, $newval$$Register); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{ +instruct xaddL_reg_no_res(memory mem, Universe dummy, rRegL add, rFlagsReg cr) %{ predicate(n->as_LoadStore()->result_not_used()); match(Set dummy (GetAndAddL mem add)); effect(KILL cr); - format %{ "ADDQ [$mem],$add" %} + format %{ "addq_lock $mem, $add" %} + ins_encode %{ + __ lock(); + __ addq($mem$$Address, $add$$Register); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct xaddL_imm_no_res(memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect(KILL cr); + format %{ "addq_lock $mem, $add" %} ins_encode %{ __ lock(); __ addq($mem$$Address, $add$$constant); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} -instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{ +instruct xaddL(memory mem, rRegL newval, rFlagsReg cr) %{ + predicate(!n->as_LoadStore()->result_not_used()); match(Set newval (GetAndAddL mem newval)); effect(KILL cr); - format %{ "XADDQ [$mem],$newval" %} + format %{ "xaddq_lock $mem, $newval" %} ins_encode %{ __ lock(); __ xaddq($mem$$Address, $newval$$Register); %} - ins_pipe( pipe_cmpxchg ); + ins_pipe(pipe_cmpxchg); %} instruct xchgB( memory mem, rRegI newval) %{ diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index 505b8cab05f..e22170389da 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -3013,10 +3013,22 @@ uint LoadStoreNode::ideal_reg() const { return _type->ideal_reg(); } +// This method conservatively checks if the result of a LoadStoreNode is +// used, that is, if it returns true, then it is definitely the case that +// the result of the node is not needed. +// For example, GetAndAdd can be matched into a lock_add instead of a +// lock_xadd if the result of LoadStoreNode::result_not_used() is true bool LoadStoreNode::result_not_used() const { - for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) { + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { Node *x = fast_out(i); - if (x->Opcode() == Op_SCMemProj) continue; + if (x->Opcode() == Op_SCMemProj) { + continue; + } + if (x->bottom_type() == TypeTuple::MEMBAR && + !x->is_Call() && + x->Opcode() != Op_Blackhole) { + continue; + } return false; } return true; diff --git a/test/hotspot/jtreg/compiler/c2/irTests/TestGetAndAdd.java b/test/hotspot/jtreg/compiler/c2/irTests/TestGetAndAdd.java new file mode 100644 index 00000000000..905a11eb494 --- /dev/null +++ b/test/hotspot/jtreg/compiler/c2/irTests/TestGetAndAdd.java @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package compiler.c2.x86; + +import compiler.lib.ir_framework.*; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; + +/* + * @test + * bug 8308444 + * @summary verify that the correct node is matched for atomic getAndAdd + * @requires os.simpleArch == "x64" + * @requires vm.compiler2.enabled + * @library /test/lib / + * @run driver compiler.c2.x86.TestGetAndAdd + */ +public class TestGetAndAdd { + static final VarHandle B; + static final VarHandle S; + static final VarHandle I; + static final VarHandle L; + + static { + try { + var lookup = MethodHandles.lookup(); + B = lookup.findStaticVarHandle(TestGetAndAdd.class, "b", byte.class); + S = lookup.findStaticVarHandle(TestGetAndAdd.class, "s", short.class); + I = lookup.findStaticVarHandle(TestGetAndAdd.class, "i", int.class); + L = lookup.findStaticVarHandle(TestGetAndAdd.class, "l", long.class); + } catch (Exception e) { + throw new AssertionError(e); + } + } + + static byte b; + static short s; + static int i; + static long l; + + static byte b2; + static short s2; + static int i2; + static long l2; + + public static void main(String[] args) { + new TestFramework() + .addFlags("-XX:+UseStoreImmI16") + .start(); + } + + @Test + @IR(counts = {IRNode.X86_LOCK_ADDB_REG, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_ADDB_IMM, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_XADDB, "3"}, phase = CompilePhase.FINAL_CODE) + public static void addB() { + B.getAndAdd(b2); + B.getAndAdd((byte)1); + b2 = (byte)B.getAndAdd(b2); + } + + @Test + @IR(counts = {IRNode.X86_LOCK_ADDS_REG, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_ADDS_IMM, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_XADDS, "3"}, phase = CompilePhase.FINAL_CODE) + public static void addS() { + S.getAndAdd(s2); + S.getAndAdd((short)1); + s2 = (short)S.getAndAdd(s2); + } + + @Test + @IR(counts = {IRNode.X86_LOCK_ADDI_REG, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_ADDI_IMM, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_XADDI, "3"}, phase = CompilePhase.FINAL_CODE) + public static void addI() { + I.getAndAdd(i2); + I.getAndAdd(1); + i2 = (int)I.getAndAdd(i2); + } + + @Test + @IR(counts = {IRNode.X86_LOCK_ADDL_REG, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_ADDL_IMM, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.X86_LOCK_XADDL, "3"}, phase = CompilePhase.FINAL_CODE) + public static void addL() { + L.getAndAdd(l2); + L.getAndAdd(1L); + l2 = (long)L.getAndAdd(l2); + } +} diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 352ae453144..eaa62c3c6b3 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -1551,6 +1551,66 @@ public class IRNode { machOnly(Z_GET_AND_SET_P_WITH_BARRIER_FLAG, regex); } + public static final String X86_LOCK_ADDB_REG = PREFIX + "X86_LOCK_ADDB_REG" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDB_REG, "xaddB_reg_no_res"); + } + + public static final String X86_LOCK_ADDB_IMM = PREFIX + "X86_LOCK_ADDB_IMM" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDB_IMM, "xaddB_imm_no_res"); + } + + public static final String X86_LOCK_XADDB = PREFIX + "X86_LOCK_XADDB" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_XADDB, "xaddB"); + } + + public static final String X86_LOCK_ADDS_REG = PREFIX + "X86_LOCK_ADDS_REG" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDS_REG, "xaddS_reg_no_res"); + } + + public static final String X86_LOCK_ADDS_IMM = PREFIX + "X86_LOCK_ADDS_IMM" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDS_IMM, "xaddS_imm_no_res"); + } + + public static final String X86_LOCK_XADDS = PREFIX + "X86_LOCK_XADDS" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_XADDS, "xaddS"); + } + + public static final String X86_LOCK_ADDI_REG = PREFIX + "X86_LOCK_ADDI_REG" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDI_REG, "xaddI_reg_no_res"); + } + + public static final String X86_LOCK_ADDI_IMM = PREFIX + "X86_LOCK_ADDI_IMM" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDI_IMM, "xaddI_imm_no_res"); + } + + public static final String X86_LOCK_XADDI = PREFIX + "X86_LOCK_XADDI" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_XADDI, "xaddI"); + } + + public static final String X86_LOCK_ADDL_REG = PREFIX + "X86_LOCK_ADDL_REG" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDL_REG, "xaddL_reg_no_res"); + } + + public static final String X86_LOCK_ADDL_IMM = PREFIX + "X86_LOCK_ADDL_IMM" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_ADDL_IMM, "xaddL_imm_no_res"); + } + + public static final String X86_LOCK_XADDL = PREFIX + "X86_LOCK_XADDL" + POSTFIX; + static { + machOnlyNameRegex(X86_LOCK_XADDL, "xaddL"); + } + /* * Utility methods to set up IR_NODE_MAPPINGS. */