From 0b467e902d591ae9feeec1669918d1588987cd1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roberto=20Casta=C3=B1eda=20Lozano?= Date: Thu, 3 Oct 2024 08:36:33 +0000 Subject: [PATCH] 8334060: Implementation of Late Barrier Expansion for G1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Roberto Castañeda Lozano Co-authored-by: Erik Österlund Co-authored-by: Siyao Liu Co-authored-by: Kim Barrett Co-authored-by: Amit Kumar Co-authored-by: Martin Doerr Co-authored-by: Feilong Jiang Co-authored-by: Sergey Nazarkin Reviewed-by: kvn, tschatzl, fyang, ayang, kbarrett --- make/hotspot/gensrc/GensrcAdlc.gmk | 7 + src/hotspot/cpu/aarch64/aarch64.ad | 24 +- src/hotspot/cpu/aarch64/cas.m4 | 4 + .../gc/g1/g1BarrierSetAssembler_aarch64.cpp | 282 ++-- .../gc/g1/g1BarrierSetAssembler_aarch64.hpp | 23 + src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad | 680 +++++++++ src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 | 384 ++++++ src/hotspot/cpu/arm/arm.ad | 4 + src/hotspot/cpu/arm/assembler_arm_32.hpp | 23 +- .../arm/gc/g1/g1BarrierSetAssembler_arm.cpp | 302 ++-- .../arm/gc/g1/g1BarrierSetAssembler_arm.hpp | 26 +- src/hotspot/cpu/arm/gc/g1/g1_arm.ad | 201 +++ .../arm/gc/shared/barrierSetAssembler_arm.cpp | 56 +- .../arm/gc/shared/barrierSetAssembler_arm.hpp | 24 + src/hotspot/cpu/arm/register_arm.hpp | 25 + .../ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp | 294 ++-- .../ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp | 25 + src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad | 684 ++++++++++ src/hotspot/cpu/ppc/ppc.ad | 20 +- src/hotspot/cpu/ppc/register_ppc.hpp | 9 + .../gc/g1/g1BarrierSetAssembler_riscv.cpp | 288 ++-- .../gc/g1/g1BarrierSetAssembler_riscv.hpp | 25 +- src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad | 564 ++++++++ src/hotspot/cpu/riscv/riscv.ad | 19 +- .../s390/gc/g1/g1BarrierSetAssembler_s390.cpp | 279 +++- .../s390/gc/g1/g1BarrierSetAssembler_s390.hpp | 28 +- src/hotspot/cpu/s390/gc/g1/g1_s390.ad | 457 +++++++ .../gc/shared/barrierSetAssembler_s390.cpp | 92 +- .../gc/shared/barrierSetAssembler_s390.hpp | 38 +- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 3 +- src/hotspot/cpu/s390/register_s390.hpp | 8 + src/hotspot/cpu/s390/s390.ad | 18 +- .../x86/gc/g1/g1BarrierSetAssembler_x86.cpp | 301 ++-- .../x86/gc/g1/g1BarrierSetAssembler_x86.hpp | 23 + src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad | 371 +++++ src/hotspot/cpu/x86/x86.ad | 4 + src/hotspot/cpu/x86/x86_64.ad | 16 +- src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp | 1214 +++++------------ src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp | 124 +- .../share/gc/g1/g1BarrierSetRuntime.cpp | 8 + .../share/gc/g1/g1BarrierSetRuntime.hpp | 4 + .../share/gc/shared/c2/barrierSetC2.cpp | 4 + .../share/gc/shared/c2/barrierSetC2.hpp | 4 + .../gc/shared/c2/cardTableBarrierSetC2.cpp | 29 - .../gc/shared/c2/cardTableBarrierSetC2.hpp | 2 - src/hotspot/share/opto/buildOopMap.cpp | 7 + src/hotspot/share/opto/lcm.cpp | 8 + src/hotspot/share/opto/matcher.cpp | 20 + src/hotspot/share/opto/matcher.hpp | 2 + src/hotspot/share/opto/memnode.cpp | 5 + src/hotspot/share/opto/output.cpp | 2 + .../compiler/c2/aarch64/TestVolatiles.java | 44 +- .../AllocationMergesTests.java | 9 +- .../gcbarriers/TestG1BarrierGeneration.java | 639 +++++++++ .../compiler/lib/ir_framework/IRNode.java | 102 ++ .../TestMachTempsAcrossSafepoints.java | 98 ++ .../src/sun/hotspot/tools/ctw/CtwRunner.java | 5 +- test/jdk/java/lang/invoke/BigArityTest.java | 2 +- 58 files changed, 6451 insertions(+), 1512 deletions(-) create mode 100644 src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad create mode 100644 src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 create mode 100644 src/hotspot/cpu/arm/gc/g1/g1_arm.ad create mode 100644 src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad create mode 100644 src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad create mode 100644 src/hotspot/cpu/s390/gc/g1/g1_s390.ad create mode 100644 src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad create mode 100644 test/hotspot/jtreg/compiler/gcbarriers/TestG1BarrierGeneration.java create mode 100644 test/hotspot/jtreg/compiler/runtime/safepoints/TestMachTempsAcrossSafepoints.java diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index 8dada3cec0a..ddb2c3e33e5 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -200,6 +200,13 @@ ifeq ($(call check-jvm-feature, compiler2), true) ))) endif + ifeq ($(call check-jvm-feature, g1gc), true) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU).ad \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/g1/g1_$(HOTSPOT_TARGET_CPU_ARCH).ad \ + ))) + endif + SINGLE_AD_SRCFILE := $(ADLC_SUPPORT_DIR)/all-ad-src.ad INSERT_FILENAME_AWK_SCRIPT := \ diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 39eae43a287..7d2a35cefd8 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2620,7 +2620,8 @@ static bool is_vector_bitwise_not_pattern(Node* n, Node* m) { bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { if (is_vshift_con_pattern(n, m) || is_vector_bitwise_not_pattern(n, m) || - is_valid_sve_arith_imm_pattern(n, m)) { + is_valid_sve_arith_imm_pattern(n, m) || + is_encode_and_store_pattern(n, m)) { mstack.push(m, Visit); return true; } @@ -6410,7 +6411,7 @@ instruct loadP(iRegPNoSp dst, memory mem) instruct loadN(iRegNNoSp dst, memory mem) %{ match(Set dst (LoadN mem)); - predicate(!needs_acquiring_load(n)); + predicate(!needs_acquiring_load(n) && n->as_Load()->barrier_data() == 0); ins_cost(4 * INSN_COST); format %{ "ldrw $dst, $mem\t# compressed ptr" %} @@ -6839,7 +6840,7 @@ instruct storeimmP0(immP0 zero, memory mem) instruct storeN(iRegN src, memory mem) %{ match(Set mem (StoreN mem src)); - predicate(!needs_releasing_store(n)); + predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); ins_cost(INSN_COST); format %{ "strw $src, $mem\t# compressed ptr" %} @@ -6852,7 +6853,7 @@ instruct storeN(iRegN src, memory mem) instruct storeImmN0(immN0 zero, memory mem) %{ match(Set mem (StoreN mem zero)); - predicate(!needs_releasing_store(n)); + predicate(!needs_releasing_store(n) && n->as_Store()->barrier_data() == 0); ins_cost(INSN_COST); format %{ "strw zr, $mem\t# compressed ptr" %} @@ -7086,6 +7087,7 @@ instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem) instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "ldarw $dst, $mem\t# compressed ptr" %} @@ -7253,6 +7255,7 @@ instruct storeimmP0_volatile(immP0 zero, /* sync_memory*/indirect mem) instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) %{ match(Set mem (StoreN mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "stlrw $src, $mem\t# compressed ptr" %} @@ -7265,6 +7268,7 @@ instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem) instruct storeimmN0_volatile(immN0 zero, /* sync_memory*/indirect mem) %{ match(Set mem (StoreN mem zero)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(VOLATILE_REF_COST); format %{ "stlrw zr, $mem\t# compressed ptr" %} @@ -8061,6 +8065,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ match(Set res (CompareAndSwapN mem (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8175,7 +8180,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); @@ -8280,6 +8285,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -8389,7 +8395,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(TEMP_DEF res, KILL cr); @@ -8501,6 +8507,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); effect(KILL cr); @@ -8620,7 +8627,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL // This pattern is generated automatically from cas.m4. // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); effect(KILL cr); @@ -8681,6 +8688,7 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{ %} instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); ins_cost(2 * VOLATILE_REF_COST); format %{ "atomic_xchgw $prev, $newv, [$mem]" %} @@ -8724,7 +8732,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{ %} instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_exclusive(n)); + predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); ins_cost(VOLATILE_REF_COST); format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %} diff --git a/src/hotspot/cpu/aarch64/cas.m4 b/src/hotspot/cpu/aarch64/cas.m4 index f8aac0c4939..7e13e153db1 100644 --- a/src/hotspot/cpu/aarch64/cas.m4 +++ b/src/hotspot/cpu/aarch64/cas.m4 @@ -45,7 +45,9 @@ define(`CAS_INSN', // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct compareAndExchange$1$6(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));), + $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);), $1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), + $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), $6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));), `dnl') match(Set res (CompareAndExchange$1 mem (Binary oldval newval))); @@ -122,7 +124,9 @@ define(`CAS_INSN3', // DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{ ifelse($1$6,PAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && (n->as_LoadStore()->barrier_data() == 0));), + $1$6,NAcq,INDENT(predicate(needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() == 0);), $1,P,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), + $1,N,INDENT(predicate(n->as_LoadStore()->barrier_data() == 0);), $6,Acq,INDENT(predicate(needs_acquiring_load_exclusive(n));), `dnl') match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval))); diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp index d02038b6e91..b978c350ce1 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.cpp @@ -38,7 +38,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -95,6 +98,54 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ pop(saved_regs, sp); } +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp1, const Register temp2) { + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) + __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(temp1, temp1, wordSize); // temp1 := next index + __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address + __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldrw(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp1, in_progress); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ cbz(pre_val, done); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ b(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -115,43 +166,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert_different_registers(obj, pre_val, tmp1, tmp2); assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ ldrw(tmp1, in_progress); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ ldrb(tmp1, in_progress); - } + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done __ cbzw(tmp1, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ cbz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ ldr(tmp1, index); // tmp := *index_adr - __ cbz(tmp1, runtime); // tmp == 0? - // If yes, goto runtime - - __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize - __ str(tmp1, index); // *index_adr := tmp - __ ldr(tmp2, buffer); - __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr - - // Record the previous value - __ str(pre_val, Address(tmp1, 0)); - __ b(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); __ bind(runtime); @@ -182,6 +200,50 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + // Does store cross heap regions? + __ eor(tmp1, store_addr, new_val); // tmp1 := store address ^ new value + __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) + __ cbz(tmp1, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ cbz(new_val, done); + } + // Storing region crossing non-null, is card young? + __ lsr(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ load_byte_map_base(tmp2); // tmp2 := card table base address + __ add(tmp1, tmp1, tmp2); // tmp1 := card address + __ ldrb(tmp2, Address(tmp1)); // tmp2 := card + __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); // tmp2 := card == young_card_val? +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(Assembler::StoreLoad); // StoreLoad membar + __ ldrb(tmp2, Address(tmp1)); // tmp2 := card + __ cbzw(tmp2, done); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ strb(zr, Address(tmp1)); // *(card address) := dirty_card_val + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp1, tmp2, rscratch1); + __ b(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -194,70 +256,116 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - Label done; Label runtime; - // Does store cross heap regions? - - __ eor(tmp1, store_addr, new_val); - __ lsr(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); - __ cbz(tmp1, done); - - // crosses regions, storing null? - - __ cbz(new_val, done); - - // storing region crossing non-null, is card already dirty? - - const Register card_addr = tmp1; - - __ lsr(card_addr, store_addr, CardTable::card_shift()); - - // get the address of the card - __ load_byte_map_base(tmp2); - __ add(card_addr, card_addr, tmp2); - __ ldrb(tmp2, Address(card_addr)); - __ cmpw(tmp2, (int)G1CardTable::g1_young_card_val()); + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done __ br(Assembler::EQ, done); - - assert((int)CardTable::dirty_card_val() == 0, "must be 0"); - - __ membar(Assembler::StoreLoad); - - __ ldrb(tmp2, Address(card_addr)); - __ cbzw(tmp2, done); - - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. - - __ strb(zr, Address(card_addr)); - - __ ldr(rscratch1, queue_index); - __ cbz(rscratch1, runtime); - __ sub(rscratch1, rscratch1, wordSize); - __ str(rscratch1, queue_index); - - __ ldr(tmp2, buffer); - __ str(card_addr, Address(tmp2, rscratch1)); - __ b(done); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime); __ bind(runtime); // save the live input values RegSet saved = RegSet::of(store_addr); __ push(saved, sp); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread); __ pop(saved, sp); __ bind(done); } +#if defined(COMPILER2) + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, rthread); + __ mov(rscratch1, runtime_path); + __ blr(rscratch1); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == rthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ cbnzw(tmp1, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + assert(thread == rthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, + rscratch1); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg + && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(thread, tmp1, tmp2); + + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ br(Assembler::NE, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + assert(stub->tmp3() == noreg, "not needed in this platform"); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ b(*stub->continuation()); +} + +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2) { bool on_oop = is_reference_type(type); diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp index 7b4bc8cdc49..4baa18cb945 100644 --- a/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/g1/g1BarrierSetAssembler_aarch64.hpp @@ -33,6 +33,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -69,6 +71,27 @@ public: void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2); }; diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad new file mode 100644 index 00000000000..081a67d6880 --- /dev/null +++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.ad @@ -0,0 +1,680 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_aarch64.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, rthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, rthread, tmp1, tmp2, stub); +} + +%} + +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +// This section is generated from g1_aarch64.m4 + + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "str $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ str($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StorePVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlr $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlr($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "strw $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ strw($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreNVolatile(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "stlrw $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stlrw($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(INSN_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "strw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ strw($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreNVolatile(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_releasing_store(n) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "stlrw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ stlrw($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_class_memory); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + false /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + true /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + false /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + true /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + false /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + true /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgw $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgw($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && needs_acquiring_load_exclusive(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgw_acq $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgalw($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). The same holds for g1LoadN. + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} + +// END This section of the file is automatically generated. Do not edit -------------- diff --git a/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 new file mode 100644 index 00000000000..8fb1f7e8e42 --- /dev/null +++ b/src/hotspot/cpu/aarch64/gc/g1/g1_aarch64.m4 @@ -0,0 +1,384 @@ +dnl Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +dnl +dnl This code is free software; you can redistribute it and/or modify it +dnl under the terms of the GNU General Public License version 2 only, as +dnl published by the Free Software Foundation. +dnl +dnl This code is distributed in the hope that it will be useful, but WITHOUT +dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +dnl FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +dnl version 2 for more details (a copy is included in the LICENSE file that +dnl accompanied this code). +dnl +dnl You should have received a copy of the GNU General Public License version +dnl 2 along with this work; if not, write to the Free Software Foundation, +dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +dnl +dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +dnl or visit www.oracle.com if you need additional information or have any +dnl questions. +dnl +// BEGIN This section of the file is automatically generated. Do not edit -------------- + +// This section is generated from g1_aarch64.m4 + +define(`STOREP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreP$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "$2 $src, $mem\t# ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ $2($src$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +STOREP_INSN(,str) +STOREP_INSN(Volatile,stlr) +dnl +define(`STOREN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1StoreN$1(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "$2 $src, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ $2($src$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +STOREN_INSN(,strw) +STOREN_INSN(Volatile,stlrw) +dnl +define(`ENCODESTOREN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1EncodePAndStoreN$1(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Volatile,'needs_releasing_store(n)`,'!needs_releasing_store(n)`) && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Volatile,VOLATILE_REF_COST,INSN_COST)); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "$2 $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ $2($tmp1$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(ifelse($1,Volatile,pipe_class_memory,istore_reg_mem)); +%}')dnl +ENCODESTOREN_INSN(,strw) +ENCODESTOREN_INSN(Volatile,stlrw) +dnl +define(`CAEP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeP$1(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + $3 /* acquire */, true /* release */, false /* weak */, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CAEP_INSN(,,false) +CAEP_INSN(Acq,_acq,true) +dnl +define(`CAEN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndExchangeN$1(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + $3 /* acquire */, true /* release */, false /* weak */, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CAEN_INSN(,,false) +CAEN_INSN(Acq,_acq,true) +dnl +define(`CASP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapP$1(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (ptr)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::xword, + $3 /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CASP_INSN(,,false) +CASP_INSN(Acq,_acq,true) +dnl +define(`CASN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1CompareAndSwapN$1(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "cmpxchg$2 $mem, $oldval, $newval\t# (narrow oop)\n\t" + "cset $res, EQ" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::word, + $3 /* acquire */, true /* release */, false /* weak */, noreg); + __ cset($res$$Register, Assembler::EQ); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%}')dnl +CASN_INSN(,,false) +CASN_INSN(Acq,_acq,true) +dnl +define(`XCHGP_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetP$1(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "atomic_xchg$2 $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ $3($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%}')dnl +XCHGP_INSN(,,atomic_xchg) +XCHGP_INSN(Acq,_acq,atomic_xchgal) +dnl +define(`XCHGN_INSN', +` +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1GetAndSetN$1(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval, rFlagsReg cr) +%{ + predicate(UseG1GC && ifelse($1,Acq,'needs_acquiring_load_exclusive(n)`,'!needs_acquiring_load_exclusive(n)`) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(ifelse($1,Acq,VOLATILE_REF_COST,2 * VOLATILE_REF_COST)); + format %{ "$2 $preval, $newval, [$mem]" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ $3($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%}')dnl +XCHGN_INSN(,atomic_xchgw,atomic_xchgw) +XCHGN_INSN(Acq,atomic_xchgw_acq,atomic_xchgalw) + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, rFlagsReg cr) +%{ + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). The same holds for g1LoadN. + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldr $dst, $mem\t# ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +// This pattern is generated automatically from g1_aarch64.m4. +// DO NOT EDIT ANYTHING IN THIS SECTION OF THE FILE +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(4 * INSN_COST); + format %{ "ldrw $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ ldrw($dst$$Register, $mem$$Register); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} + +// END This section of the file is automatically generated. Do not edit -------------- diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 2c7de0a58a2..716f6d87230 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -3890,6 +3890,7 @@ instruct loadRange(iRegI dst, memoryI mem) %{ instruct loadP(iRegP dst, memoryP mem) %{ + predicate(!(UseG1GC && n->as_Load()->barrier_data() != 0)); match(Set dst (LoadP mem)); ins_cost(MEMORY_REF_COST); size(4); @@ -4356,6 +4357,7 @@ instruct movSP(store_ptr_RegP dst, SPRegP src) %{ instruct storeP(memoryP mem, store_ptr_RegP src) %{ + predicate(!(UseG1GC && n->as_Store()->barrier_data() != 0)); match(Set mem (StoreP mem src)); ins_cost(MEMORY_REF_COST); size(4); @@ -5390,6 +5392,7 @@ instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI re %} instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0)); match(Set res (CompareAndSwapP mem (Binary oldval newval))); effect( KILL ccr, TEMP tmp); size(28); @@ -5659,6 +5662,7 @@ instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %} instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{ + predicate(!(UseG1GC && n->as_LoadStore()->barrier_data() != 0)); match(Set res (GetAndSetP mem newval)); effect(KILL ccr, TEMP tmp, TEMP res); size(16); diff --git a/src/hotspot/cpu/arm/assembler_arm_32.hpp b/src/hotspot/cpu/arm/assembler_arm_32.hpp index dd04ad1ab3a..e53eefac097 100644 --- a/src/hotspot/cpu/arm/assembler_arm_32.hpp +++ b/src/hotspot/cpu/arm/assembler_arm_32.hpp @@ -119,8 +119,9 @@ class RegisterSet { } friend RegisterSet operator | (const RegisterSet set1, const RegisterSet set2) { - assert((set1._encoding & set2._encoding) == 0, - "encoding constraint"); +// why so strong constraint? +// assert((set1._encoding & set2._encoding) == 0, +// "encoding constraint"); return RegisterSet(set1._encoding | set2._encoding); } @@ -142,6 +143,11 @@ class RegisterSet { } return count; } + + static RegisterSet from(RegSet set) { + assert(set.size(), "RegSet must not be empty"); + return RegisterSet(set.bits()); + } }; #if R9_IS_SCRATCHED @@ -157,6 +163,10 @@ class FloatRegisterSet { public: + FloatRegisterSet() { + _encoding = 0; + } + FloatRegisterSet(FloatRegister reg) { if (reg->hi_bit() == 0) { _encoding = reg->hi_bits() << 12 | reg->lo_bit() << 22 | 1; @@ -185,6 +195,15 @@ class FloatRegisterSet { return (_encoding & 0xFFFFFF00) | ((_encoding & 0xFF) << 1); } + static FloatRegisterSet from(FloatRegSet set) { + assert(set.size(), "FloatRegSet must not be empty"); + // the vector load/store instructions operate on a set of consecutive registers. + // for the sake of simplicity, write all registers between the first and last in the set + size_t range = (*set.rbegin())->encoding() - (*set.begin())->encoding() + 1; + // push_float stores float regisgters by pairs + return FloatRegisterSet(*set.begin(), (range+1)/2); + } + }; diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp index 3c5e29aa871..56ae7707fbf 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp @@ -39,8 +39,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif - +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> #ifdef PRODUCT @@ -106,70 +108,87 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas #endif // !R9_IS_SCRATCHED } +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp1, const Register temp2) { + assert_different_registers(value, temp1, temp2); + // Can we store original value in the thread's buffer? + // (The index field is typed as size_t.) + __ ldr(temp1, Address(thread, in_bytes(index_offset))); // temp1 := *(index address) + __ cbz(temp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(temp1, temp1, wordSize); // temp1 := next index + __ str(temp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ldr(temp2, Address(thread, in_bytes(buffer_offset))); // temp2 := buffer address + // Record the previous value + __ str(value, Address(temp2, temp1)); // *(buffer address + next index) := value + } + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); + __ ldrb(tmp1, in_progress); +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + __ cbz(pre_val, done); + + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ b(done); +} + // G1 pre-barrier. -// Blows all volatile registers R0-R3, Rtemp, LR). -// If store_addr != noreg, then previous value is loaded from [store_addr]; -// in such case store_addr and new_val registers are preserved; +// Blows all volatile registers R0-R3, LR). +// If obj != noreg, then previous value is loaded from [obj]; +// in such case obj and pre_val registers is preserved; // otherwise pre_val register is preserved. void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, - Register store_addr, - Register new_val, + Register obj, Register pre_val, Register tmp1, Register tmp2) { Label done; Label runtime; - if (store_addr != noreg) { - assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); - } else { - assert (new_val == noreg, "should be"); - assert_different_registers(pre_val, tmp1, tmp2, noreg); - } + assert_different_registers(obj, pre_val, tmp1, tmp2, noreg); - Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); - __ ldrb(tmp1, in_progress); + generate_pre_barrier_fast_path(masm, Rthread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done __ cbz(tmp1, done); - // Do we need to load the previous value? - if (store_addr != noreg) { - __ load_heap_oop(pre_val, Address(store_addr, 0)); - } - - // Is the previous value null? - __ cbz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ ldr(tmp1, index); // tmp1 := *index_adr - __ ldr(tmp2, buffer); - - __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize - __ b(runtime, lt); // If negative, goto runtime - - __ str(tmp1, index); // *index_adr := tmp1 - - // Record the previous value - __ str(pre_val, Address(tmp2, tmp1)); - __ b(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, Rthread, tmp1, tmp2, done, runtime); __ bind(runtime); // save the live input values - if (store_addr != noreg) { - // avoid raw_push to support any ordering of store_addr and new_val - __ push(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - __ push(pre_val); + RegisterSet set = RegisterSet(pre_val) | RegisterSet(R0, R3) | RegisterSet(R12); + // save the live input values + if (obj != noreg) { + // avoid raw_push to support any ordering of store_addr and pre_val + set = set | RegisterSet(obj); } + __ push(set); + if (pre_val != R0) { __ mov(R0, pre_val); } @@ -177,33 +196,17 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), R0, R1); - if (store_addr != noreg) { - __ pop(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - __ pop(pre_val); - } - + __ pop(set); __ bind(done); } -// G1 post-barrier. -// Blows all volatile registers R0-R3, Rtemp, LR). -void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, - Register store_addr, - Register new_val, - Register tmp1, - Register tmp2, - Register tmp3) { - - Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - Label done; - Label runtime; - +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { // Does store cross heap regions? __ eor(tmp1, store_addr, new_val); @@ -211,22 +214,31 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, __ b(done, eq); // crosses regions, storing null? - - __ cbz(new_val, done); - + if (new_val_may_be_null) { + __ cbz(new_val, done); + } // storing region crossing non-null, is card already dirty? const Register card_addr = tmp1; - __ mov_address(tmp2, (address)ct->byte_map_base()); + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + __ mov_address(tmp2, (address)ct->card_table()->byte_map_base()); __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift())); __ ldrb(tmp2, Address(card_addr)); __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); - __ b(done, eq); +} +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + const Register tmp3, + Label& done, + Label& runtime) { __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); - assert(CardTable::dirty_card_val() == 0, "adjust this code"); + // card_addr is loaded by generate_post_barrier_fast_path + const Register card_addr = tmp1; __ ldrb(tmp2, Address(card_addr)); __ cbz(tmp2, done); @@ -234,29 +246,139 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, // dirty card and log. __ strb(__ zero_register(tmp2), Address(card_addr)); - - __ ldr(tmp2, queue_index); - __ ldr(tmp3, buffer); - - __ subs(tmp2, tmp2, wordSize); - __ b(runtime, lt); // go to runtime if now negative - - __ str(tmp2, queue_index); - - __ str(card_addr, Address(tmp3, tmp2)); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, card_addr, tmp2, tmp3); __ b(done); +} + + +// G1 post-barrier. +// Blows all volatile registers R0-R3, LR). +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + Label done; + Label runtime; + + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done + // card_addr and card are loaded by generate_post_barrier_fast_path + const Register card = tmp2; + const Register card_addr = tmp1; + __ b(done, eq); + generate_post_barrier_slow_path(masm, Rthread, card_addr, tmp2, tmp3, done, runtime); __ bind(runtime); + RegisterSet set = RegisterSet(store_addr) | RegisterSet(R0, R3) | RegisterSet(R12); + __ push(set); + if (card_addr != R0) { __ mov(R0, card_addr); } __ mov(R1, Rthread); __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), R0, R1); + __ pop(set); + __ bind(done); } +#if defined(COMPILER2) + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path, Register tmp1) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, Rthread); + __ call_VM_leaf(runtime_path, R0, R1); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == Rthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ cbnz(tmp1, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), tmp1); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + Register tmp3, + G1PostBarrierStubC2* stub) { + assert(thread == Rthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, noreg); + + stub->initialize_registers(thread, tmp1, tmp2, tmp3); + + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ b(*stub->entry(), ne); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + Register tmp3 = stub->tmp3(); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, tmp3, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp2); + __ b(*stub->continuation()); +} + +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) { bool on_oop = type == T_OBJECT || type == T_ARRAY; @@ -268,7 +390,7 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator if (on_oop && on_reference) { // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. - g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2); + g1_write_barrier_pre(masm, noreg, dst, tmp1, tmp2); } } @@ -295,7 +417,7 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco } if (needs_pre_barrier) { - g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3); + g1_write_barrier_pre(masm, store_addr, tmp3 /*pre_val*/, tmp1, tmp2); } if (is_null) { diff --git a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp index 52932faa3e4..aefde19142e 100644 --- a/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp @@ -33,6 +33,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -43,7 +45,6 @@ protected: void g1_write_barrier_pre(MacroAssembler* masm, Register store_addr, - Register new_val, Register pre_val, Register tmp1, Register tmp2); @@ -70,6 +71,29 @@ public: void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + Register tmp3, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + }; #endif // CPU_ARM_GC_G1_G1BARRIERSETASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/gc/g1/g1_arm.ad b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad new file mode 100644 index 00000000000..8a0a9e1aa53 --- /dev/null +++ b/src/hotspot/cpu/arm/gc/g1/g1_arm.ad @@ -0,0 +1,201 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_arm.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Rthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Rthread, tmp1, tmp2, tmp3, stub); +} + +%} + +instruct g1StoreP(indirect mem, iRegP src, iRegP tmp1, iRegP tmp2, iRegP tmp3, flagsReg icc) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL icc); + ins_cost(2 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "sd $src, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ str($src$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(istore_mem_reg); +%} + +instruct g1CompareAndSwapP(iRegI res, indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP oldval, flagsReg ccr ) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(KILL ccr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp1, $oldval\n\t" + "STREX.eq $tmp1, $newval, $mem\n\t" + "MOV.ne $tmp1, 0 \n\t" + "EORS.eq $tmp1,$tmp1, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp1" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ cmp($tmp1$$Register, $oldval$$Register); + __ strex($tmp1$$Register, $newval$$Register, $mem$$Address, eq); + __ mov($tmp1$$Register, 0, ne); + __ eors($tmp1$$Register, $tmp1$$Register, 1, eq); + __ b(loop, eq); + __ mov($res$$Register, $tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(long_memory_op); +%} + + +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegP tmp1, iRegP tmp2, iRegP tmp3, iRegP preval, flagsReg ccr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(KILL ccr, TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(4 * (MEMORY_REF_COST + BRANCH_COST)); + format %{ "loop: \n\t" + "LDREX $preval, $mem\n\t" + "STREX $tmp1, $newval, $mem\n\t" + "CMP $tmp1, 0 \n\t" + "B.ne loop \n\t" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + Label loop; + __ bind(loop); + __ ldrex($preval$$Register,$mem$$Address); + __ strex($tmp1$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp1$$Register, 0); + __ b(loop, ne); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + $tmp3$$Register /* tmp3 */); + %} + ins_pipe(long_memory_op); +%} + +instruct g1LoadP(iRegP dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg icc) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL icc); + ins_cost(MEMORY_REF_COST + BRANCH_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ ldr($dst$$Register, Address($mem$$Register)); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_mem); +%} diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp index ea19730673c..c13a259a1b9 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp @@ -31,6 +31,10 @@ #include "runtime/javaThread.hpp" #include "runtime/stubRoutines.hpp" +#ifdef COMPILER2 +#include "gc/shared/c2/barrierSetC2.hpp" +#endif // COMPILER2 + #define __ masm-> void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -206,7 +210,57 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { #ifdef COMPILER2 OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { - Unimplemented(); // This must be implemented to support late barrier expansion. + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (!vm_reg->is_valid()){ + // skip APSR and FPSCR + return OptoReg::Bad; + } + + return opto_reg; } +void SaveLiveRegisters::initialize(BarrierStubC2* stub) { + // Record registers that needs to be saved/restored + RegMaskIterator rmi(stub->preserve_set()); + while (rmi.has_next()) { + const OptoReg::Name opto_reg = rmi.next(); + if (OptoReg::is_reg(opto_reg)) { + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + gp_regs += RegSet::of(vm_reg->as_Register()); + } else if (vm_reg->is_FloatRegister()) { + fp_regs += FloatRegSet::of(vm_reg->as_FloatRegister()); + } else { + fatal("Unknown register type"); + } + } + } + // Remove C-ABI SOE registers that will be updated + gp_regs -= RegSet::range(R4, R11) + RegSet::of(R13, R15); + + // Remove C-ABI SOE fp registers + fp_regs -= FloatRegSet::range(S16, S31); +} + +SaveLiveRegisters::SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub) + : masm(masm), + gp_regs(), + fp_regs() { + // Figure out what registers to save/restore + initialize(stub); + + // Save registers + if (gp_regs.size() > 0) __ push(RegisterSet::from(gp_regs)); + if (fp_regs.size() > 0) __ fpush(FloatRegisterSet::from(fp_regs)); +} + +SaveLiveRegisters::~SaveLiveRegisters() { + // Restore registers + if (fp_regs.size() > 0) __ fpop(FloatRegisterSet::from(fp_regs)); + if (gp_regs.size() > 0) __ pop(RegisterSet::from(gp_regs)); +} #endif // COMPILER2 diff --git a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp index 60021390ea2..054d172f463 100644 --- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp @@ -31,7 +31,9 @@ #ifdef COMPILER2 #include "code/vmreg.hpp" #include "opto/optoreg.hpp" +#include "opto/regmask.hpp" +class BarrierStubC2; class Node; #endif // COMPILER2 @@ -69,4 +71,26 @@ public: #endif // COMPILER2 }; +#ifdef COMPILER2 +// This class saves and restores the registers that need to be preserved across +// the runtime call represented by a given C2 barrier stub. Use as follows: +// { +// SaveLiveRegisters save(masm, stub); +// .. +// __ bl(...); +// .. +// } +class SaveLiveRegisters { +private: + MacroAssembler* const masm; + RegSet gp_regs; + FloatRegSet fp_regs; + +public: + void initialize(BarrierStubC2* stub); + SaveLiveRegisters(MacroAssembler* masm, BarrierStubC2* stub); + ~SaveLiveRegisters(); +}; + +#endif // COMPILER2 #endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP diff --git a/src/hotspot/cpu/arm/register_arm.hpp b/src/hotspot/cpu/arm/register_arm.hpp index 9f486d2a625..d8961fd2935 100644 --- a/src/hotspot/cpu/arm/register_arm.hpp +++ b/src/hotspot/cpu/arm/register_arm.hpp @@ -303,6 +303,31 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { static const int max_fpr; }; +typedef AbstractRegSet RegSet; +typedef AbstractRegSet FloatRegSet; + +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + + +template <> +inline FloatRegister AbstractRegSet::first() { + uint32_t first = _bitset & -_bitset; + return first ? as_FloatRegister(exact_log2(first)) : fnoreg; +} + +template <> +inline FloatRegister AbstractRegSet::last() { + if (_bitset == 0) { return fnoreg; } + int last = max_size() - 1 - count_leading_zeros(_bitset); + return as_FloatRegister(last); +} + + + class VFPSystemRegisterImpl; typedef VFPSystemRegisterImpl* VFPSystemRegister; class VFPSystemRegisterImpl : public AbstractRegisterImpl { diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp index 7d230d301c2..39693bdf925 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.cpp @@ -41,10 +41,20 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> +static void generate_marking_inactive_test(MacroAssembler* masm) { + int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbz(R0, active_offset, R16_thread); // tmp1 := *(mark queue active address) + __ cmpwi(CCR0, R0, 0); +} + void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register from, Register to, Register count, Register preserve1, Register preserve2) { @@ -58,13 +68,7 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm Label filtered; // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(R0, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } - __ cmpdi(CCR0, R0, 0); + generate_marking_inactive_test(masm); __ beq(CCR0, filtered); __ save_LR(R0); @@ -109,35 +113,48 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ restore_LR(R0); } +static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register value, const Register temp) { + assert_different_registers(value, temp); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ld(temp, in_bytes(index_offset), R16_thread); // temp := *(index address) + __ cmpdi(CCR0, temp, 0); // jump to runtime if index == 0 (full buffer) + __ beq(CCR0, runtime); + // The buffer is not full, store value into it. + __ ld(R0, in_bytes(buffer_offset), R16_thread); // R0 := buffer address + __ addi(temp, temp, -wordSize); // temp := next index + __ std(temp, in_bytes(index_offset), R16_thread); // *(index address) := next index + __ stdx(value, temp, R0); // *(buffer address + next index) := value +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, DecoratorSet decorators, Register obj, RegisterOrConstant ind_or_offs, Register pre_val, Register tmp1, Register tmp2, MacroAssembler::PreservationLevel preservation_level) { + assert_different_registers(pre_val, tmp1, tmp2); + bool not_null = (decorators & IS_NOT_NULL) != 0, preloaded = obj == noreg; Register nv_save = noreg; - if (preloaded) { + // Determine necessary runtime invocation preservation measures + const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; + const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; + const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; + int nbytes_save = 0; + + if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { // We are not loading the previous value so make // sure that we don't trash the value in pre_val // with the code below. - assert_different_registers(pre_val, tmp1, tmp2); - if (pre_val->is_volatile()) { - nv_save = !tmp1->is_volatile() ? tmp1 : tmp2; - assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register"); - } + nv_save = !tmp1->is_volatile() ? tmp1 : tmp2; + assert(!nv_save->is_volatile(), "need one nv temp register if pre_val lives in volatile register"); } Label runtime, filtered; - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(tmp1, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()), R16_thread); - } - __ cmpdi(CCR0, tmp1, 0); + generate_marking_inactive_test(masm); __ beq(CCR0, filtered); // Do we need to load the previous value? @@ -175,28 +192,12 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator // Can we store original value in the thread's buffer? // Is index == 0? // (The index field is typed as size_t.) - const Register Rbuffer = tmp1, Rindex = tmp2; - - __ ld(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread); - __ cmpdi(CCR0, Rindex, 0); - __ beq(CCR0, runtime); // If index == 0, goto runtime. - __ ld(Rbuffer, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()), R16_thread); - - __ addi(Rindex, Rindex, -wordSize); // Decrement index. - __ std(Rindex, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()), R16_thread); - - // Record the previous value. - __ stdx(pre_val, Rbuffer, Rindex); + generate_queue_insertion(masm, G1ThreadLocalData::satb_mark_queue_index_offset(), G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, pre_val, tmp1); __ b(filtered); __ bind(runtime); - // Determine necessary runtime invocation preservation measures - const bool needs_frame = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR; - const bool preserve_gp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS; - const bool preserve_fp_registers = preservation_level >= MacroAssembler::PRESERVATION_FRAME_LR_GP_FP_REGS; - int nbytes_save = 0; - // May need to preserve LR. Also needed if current frame is not compatible with C calling convention. if (needs_frame) { if (preserve_gp_registers) { @@ -210,11 +211,11 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator __ push_frame_reg_args(nbytes_save, tmp2); } - if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { + if (nv_save != noreg) { __ mr(nv_save, pre_val); // Save pre_val across C call if it was preloaded. } __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, R16_thread); - if (pre_val->is_volatile() && preloaded && !preserve_gp_registers) { + if (nv_save != noreg) { __ mr(pre_val, nv_save); // restore } @@ -230,6 +231,26 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator __ bind(filtered); } +static void generate_region_crossing_test(MacroAssembler* masm, const Register store_addr, const Register new_val) { + __ xorr(R0, store_addr, new_val); // tmp1 := store address ^ new value + __ srdi_(R0, R0, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) +} + +static Address generate_card_young_test(MacroAssembler* masm, const Register store_addr, const Register tmp1, const Register tmp2) { + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + __ load_const_optimized(tmp1, (address)(ct->card_table()->byte_map_base()), tmp2); + __ srdi(tmp2, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ lbzx(R0, tmp1, tmp2); // tmp1 := card address + __ cmpwi(CCR0, R0, (int)G1CardTable::g1_young_card_val()); + return Address(tmp1, tmp2); // return card address +} + +static void generate_card_dirty_test(MacroAssembler* masm, Address card_addr) { + __ membar(Assembler::StoreLoad); // Must reload after StoreLoad membar due to concurrent refinement + __ lbzx(R0, card_addr.base(), card_addr.index()); // tmp2 := card + __ cmpwi(CCR0, R0, (int)G1CardTable::dirty_card_val()); // tmp2 := card == dirty_card_val? +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, DecoratorSet decorators, Register store_addr, Register new_val, Register tmp1, Register tmp2, Register tmp3, @@ -241,9 +262,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); - // Does store cross heap regions? - __ xorr(tmp1, store_addr, new_val); - __ srdi_(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); + generate_region_crossing_test(masm, store_addr, new_val); __ beq(CCR0, filtered); // Crosses regions, storing null? @@ -257,43 +276,22 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato __ beq(CCR0, filtered); } - // Storing region crossing non-null, is card already dirty? - const Register Rcard_addr = tmp1; - Register Rbase = tmp2; - __ load_const_optimized(Rbase, (address)(ct->card_table()->byte_map_base()), /*temp*/ tmp3); - - __ srdi(Rcard_addr, store_addr, CardTable::card_shift()); - - // Get the address of the card. - __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); - __ cmpwi(CCR0, tmp3, (int)G1CardTable::g1_young_card_val()); + Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2); __ beq(CCR0, filtered); - __ membar(Assembler::StoreLoad); - __ lbzx(/*card value*/ tmp3, Rbase, Rcard_addr); // Reload after membar. - __ cmpwi(CCR0, tmp3 /* card value */, (int)G1CardTable::dirty_card_val()); + generate_card_dirty_test(masm, card_addr); __ beq(CCR0, filtered); - // Storing a region crossing, non-null oop, card is clean. - // Dirty card and log. - __ li(tmp3, (int)G1CardTable::dirty_card_val()); - //release(); // G1: oops are allowed to get visible after dirty marking. - __ stbx(tmp3, Rbase, Rcard_addr); + __ li(R0, (int)G1CardTable::dirty_card_val()); + __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val - __ add(Rcard_addr, Rbase, Rcard_addr); // This is the address which needs to get enqueued. - Rbase = noreg; // end of lifetime + Register Rcard_addr = tmp3; + __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued. - const Register Rqueue_index = tmp2, - Rqueue_buf = tmp3; - __ ld(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread); - __ cmpdi(CCR0, Rqueue_index, 0); - __ beq(CCR0, runtime); // index == 0 then jump to runtime - __ ld(Rqueue_buf, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()), R16_thread); - - __ addi(Rqueue_index, Rqueue_index, -wordSize); // decrement index - __ std(Rqueue_index, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()), R16_thread); - - __ stdx(Rcard_addr, Rqueue_buf, Rqueue_index); // store card + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, Rcard_addr, tmp1); __ b(filtered); __ bind(runtime); @@ -392,6 +390,142 @@ void G1BarrierSetAssembler::resolve_jobject(MacroAssembler* masm, Register value __ bind(done); } +#ifdef COMPILER2 + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + __ call_VM_leaf(runtime_path, arg, R16_thread); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert_different_registers(obj, tmp1, tmp2, R0); + assert_different_registers(pre_val, tmp1, R0); + assert(!UseCompressedOops || tmp2 != noreg, "tmp2 needed with CompressedOops"); + + stub->initialize_registers(obj, pre_val, R16_thread, tmp1, tmp2); + + generate_marking_inactive_test(masm); + __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register tmp1 = stub->tmp1(); + + __ bind(*stub->entry()); + + if (obj != noreg) { + // Note: C2 currently doesn't use implicit null checks with barriers. + // Otherwise, obj could be null and the following instruction would raise a SIGSEGV. + if (UseCompressedOops) { + __ lwz(pre_val, 0, obj); + } else { + __ ld(pre_val, 0, obj); + } + } + __ cmpdi(CCR0, pre_val, 0); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + + Register pre_val_decoded = pre_val; + if (UseCompressedOops) { + pre_val_decoded = __ decode_heap_oop_not_null(stub->tmp2(), pre_val); + } + + generate_queue_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, pre_val_decoded, tmp1); + __ b(*stub->continuation()); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val_decoded, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub, + bool decode_new_val) { + assert_different_registers(store_addr, new_val, tmp1, R0); + assert_different_registers(store_addr, tmp1, tmp2, R0); + + stub->initialize_registers(R16_thread, tmp1, tmp2); + + bool null_check_required = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + Register new_val_decoded = new_val; + + if (decode_new_val) { + assert(UseCompressedOops, "or should not be here"); + if (null_check_required && CompressedOops::base() != nullptr) { + // We prefer doing the null check after the region crossing check. + // Only compressed oop modes with base != null require a null check here. + __ cmpwi(CCR0, new_val, 0); + __ beq(CCR0, *stub->continuation()); + null_check_required = false; + } + new_val_decoded = __ decode_heap_oop_not_null(tmp2, new_val); + } + + generate_region_crossing_test(masm, store_addr, new_val_decoded); + __ beq(CCR0, *stub->continuation()); + + // crosses regions, storing null? + if (null_check_required) { + __ cmpdi(CCR0, new_val_decoded, 0); + __ beq(CCR0, *stub->continuation()); + } + + Address card_addr = generate_card_young_test(masm, store_addr, tmp1, tmp2); + assert(card_addr.base() == tmp1 && card_addr.index() == tmp2, "needed by post barrier stub"); + __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Address card_addr(stub->tmp1(), stub->tmp2()); // See above. + + __ bind(*stub->entry()); + + generate_card_dirty_test(masm, card_addr); + __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), *stub->continuation()); + + __ li(R0, (int)G1CardTable::dirty_card_val()); + __ stbx(R0, card_addr.base(), card_addr.index()); // *(card address) := dirty_card_val + + Register Rcard_addr = stub->tmp1(); + __ add(Rcard_addr, card_addr.base(), card_addr.index()); // This is the address which needs to get enqueued. + + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, Rcard_addr, stub->tmp2()); + __ b(*stub->continuation()); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, Rcard_addr, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ b(*stub->continuation()); +} + +#endif // COMPILER2 + #ifdef COMPILER1 #undef __ @@ -470,13 +604,7 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* __ std(tmp2, -24, R1_SP); // Is marking still active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ lwz(tmp, satb_q_active_byte_offset, R16_thread); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbz(tmp, satb_q_active_byte_offset, R16_thread); - } - __ cmpdi(CCR0, tmp, 0); + generate_marking_inactive_test(sasm); __ beq(CCR0, marking_not_active); __ bind(restart); diff --git a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp index d9a252ff6ea..1c9fe8a5d10 100644 --- a/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/gc/g1/g1BarrierSetAssembler_ppc.hpp @@ -30,10 +30,16 @@ #include "gc/shared/modRefBarrierSetAssembler.hpp" #include "utilities/macros.hpp" +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif + class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -59,6 +65,25 @@ protected: MacroAssembler::PreservationLevel preservation_level); public: +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub, + bool decode_new_val); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif #ifdef COMPILER1 void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); diff --git a/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad new file mode 100644 index 00000000000..f4163242cad --- /dev/null +++ b/src/hotspot/cpu/ppc/gc/g1/g1_ppc.ad @@ -0,0 +1,684 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024 SAP SE. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_ppc.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void pre_write_barrier(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2 = noreg, // only needed with CompressedOops when pre_val needs to be preserved + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, tmp1, (tmp2 != noreg) ? tmp2 : pre_val, stub); +} + +static void post_write_barrier(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + bool decode_new_val = false) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, tmp1, tmp2, stub, decode_new_val); +} + +%} + +instruct g1StoreP(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "std $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ std($src$$Register, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1StoreN(indirect mem, iRegNsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "stw $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ stw($src$$Register, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegPsrc src, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "encode_heap_oop $src\n\t" + "stw $mem, $src\t# ptr" %} + ins_encode %{ + pre_write_barrier(masm, this, + $mem$$Register, + $tmp1$$Register, + $tmp2$$Register, + noreg, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + Register encoded_oop = noreg; + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + encoded_oop = __ encode_heap_oop($tmp2$$Register, $src$$Register); + } else { + encoded_oop = __ encode_heap_oop_not_null($tmp2$$Register, $src$$Register); + } + __ stw(encoded_oop, 0, $mem$$Register); + post_write_barrier(masm, this, + $mem$$Register, + $src$$Register /* new_val */, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeP(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgd $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeP_acq(iRegPdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgd acq $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgd(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeN(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() != MemNode::acquire && ((CompareAndExchangeNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgw $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndExchangeN_acq(iRegNdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndExchangeNode*)n)->order() == MemNode::acquire || ((CompareAndExchangeNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "cmpxchgw acq $newval, $mem" %} + ins_encode %{ + Label no_update; + __ cmpxchgw(CCR0, $res$$Register, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapP(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGD $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapP_acq(iRegIdst res, indirect mem, iRegPsrc oldval, iRegPsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGD acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgd(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */); + __ li($res$$Register, 1); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + __ bind(no_update); // weak version requires no memory barrier on failure + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapN(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst)); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGW $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + __ bind(no_update); + %} + ins_pipe(pipe_class_default); +%} + +instruct weakG1CompareAndSwapN_acq(iRegIdst res, indirect mem, iRegNsrc oldval, iRegNsrc newval, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0 && + (((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst)); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP_DEF res, TEMP tmp, KILL cr0); + format %{ "weak CMPXCHGW acq $res, $mem, $oldval, $newval; as bool; ptr" %} + ins_encode %{ + Label no_update; + __ li($res$$Register, 0); + __ cmpxchgw(CCR0, R0, $oldval$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), + noreg, &no_update, true, true); + // Pass oldval to SATB which is the only value which can get overwritten. + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg, + $oldval$$Register /* pre_val */, + $tmp$$Register, + $res$$Register /* temp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp$$Register, + $res$$Register /* temp */, + true /* decode_new_val */); + __ li($res$$Register, 1); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + // isync would be sufficient in case of CompareAndExchangeAcquire, but we currently don't optimize for that. + __ sync(); + } + __ bind(no_update); // weak version requires no memory barrier on failure + %} + ins_pipe(pipe_class_default); +%} + +instruct g1GetAndSetP(iRegPdst res, indirect mem, iRegPsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (GetAndSetP mem newval)); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "GetAndSetP $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ getandsetd($res$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg /* obj */, + $res$$Register /* res */, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1GetAndSetN(iRegNdst res, indirect mem, iRegNsrc newval, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (GetAndSetN mem newval)); + effect(TEMP_DEF res, TEMP tmp1, TEMP tmp2, KILL cr0); + format %{ "GetAndSetN $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + __ getandsetw($res$$Register, $newval$$Register, $mem$$Register, + MacroAssembler::cmpxchgx_hint_atomic_update()); + // Can be done after cmpxchg because there's no safepoint here. + pre_write_barrier(masm, this, + noreg /* obj */, + $res$$Register /* res */, + $tmp1$$Register, + $tmp2$$Register, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + post_write_barrier(masm, this, + $mem$$Register, + $newval$$Register, + $tmp1$$Register, + $tmp2$$Register, + true /* decode_new_val */); + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + __ isync(); + } else { + __ sync(); + } + %} + ins_pipe(pipe_class_default); +%} + +instruct g1LoadP(iRegPdst dst, memoryAlg4 mem, iRegPdst tmp, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0); + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). + match(Set dst (LoadP mem)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + __ ld($dst$$Register, $mem$$disp, $mem$$base$$Register); + pre_write_barrier(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct g1LoadN(iRegNdst dst, memoryAlg4 mem, iRegPdst tmp1, iRegPdst tmp2, flagsRegCR0 cr0) +%{ + predicate(UseG1GC && n->as_Load()->is_unordered() && n->as_Load()->barrier_data() != 0); + // This instruction does not need an acquiring counterpart because it is only + // used for reference loading (Reference::get()). + match(Set dst (LoadN mem)); + effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, KILL cr0); + ins_cost(2 * MEMORY_REF_COST); + format %{ "lwz $dst, $mem\t# ptr" %} + ins_encode %{ + __ lwz($dst$$Register, $mem$$disp, $mem$$base$$Register); + pre_write_barrier(masm, this, + noreg /* obj */, + $dst$$Register, + $tmp1$$Register, + $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index ca9abfa3719..d15f9929671 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -1000,6 +1000,10 @@ int MachNode::compute_padding(int current_offset) const { // Should the matcher clone input 'm' of node 'n'? bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } @@ -5407,7 +5411,7 @@ instruct loadRange(iRegIdst dst, memory mem) %{ // Load Compressed Pointer instruct loadN(iRegNdst dst, memory mem) %{ match(Set dst (LoadN mem)); - predicate(n->as_Load()->is_unordered() || followed_by_acquire(n)); + predicate((n->as_Load()->is_unordered() || followed_by_acquire(n)) && n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// load compressed ptr" %} @@ -5419,6 +5423,7 @@ instruct loadN(iRegNdst dst, memory mem) %{ // Load Compressed Pointer acquire. instruct loadN_ac(iRegNdst dst, memory mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(3*MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// load acquire compressed ptr\n\t" @@ -5432,7 +5437,7 @@ instruct loadN_ac(iRegNdst dst, memory mem) %{ // Load Compressed Pointer and decode it if narrow_oop_shift == 0. instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{ match(Set dst (DecodeN (LoadN mem))); - predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0); + predicate(_kids[0]->_leaf->as_Load()->is_unordered() && CompressedOops::shift() == 0 && _kids[0]->_leaf->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} @@ -6423,6 +6428,7 @@ instruct reinterpretX(vecX dst) %{ // Store Compressed Oop instruct storeN(memory dst, iRegN_P2N src) %{ match(Set dst (StoreN dst src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); format %{ "STW $src, $dst \t// compressed oop" %} @@ -7477,6 +7483,7 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7676,7 +7683,7 @@ instruct weakCompareAndSwapI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7690,7 +7697,7 @@ instruct weakCompareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iReg instruct weakCompareAndSwapN_acq_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (WeakCompareAndSwapN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); // TEMP_DEF to avoid jump format %{ "weak CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as bool" %} ins_encode %{ @@ -7939,7 +7946,7 @@ instruct compareAndExchangeI_acq_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst); + predicate(((CompareAndSwapNode*)n)->order() != MemNode::acquire && ((CompareAndSwapNode*)n)->order() != MemNode::seqcst && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as narrow oop" %} ins_encode %{ @@ -7953,7 +7960,7 @@ instruct compareAndExchangeN_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iReg instruct compareAndExchangeN_acq_regP_regN_regN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndExchangeN mem_ptr (Binary src1 src2))); - predicate(((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst); + predicate((((CompareAndSwapNode*)n)->order() == MemNode::acquire || ((CompareAndSwapNode*)n)->order() == MemNode::seqcst) && n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "CMPXCHGW acq $res, $mem_ptr, $src1, $src2; as narrow oop" %} ins_encode %{ @@ -8262,6 +8269,7 @@ instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetN mem_ptr src)); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(TEMP_DEF res, TEMP cr0); format %{ "GetAndSetN $res, $mem_ptr, $src" %} ins_encode %{ diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp index 302d49884fa..b7ba4f053b5 100644 --- a/src/hotspot/cpu/ppc/register_ppc.hpp +++ b/src/hotspot/cpu/ppc/register_ppc.hpp @@ -27,6 +27,7 @@ #define CPU_PPC_REGISTER_PPC_HPP #include "asm/register.hpp" +#include "utilities/count_trailing_zeros.hpp" // forward declaration class VMRegImpl; @@ -555,4 +556,12 @@ constexpr Register R29_TOC = R29; constexpr Register R11_scratch1 = R11; constexpr Register R12_scratch2 = R12; +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + +typedef AbstractRegSet RegSet; + #endif // CPU_PPC_REGISTER_PPC_HPP diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp index 062f8029062..7036c44d99d 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,7 +39,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -96,6 +99,55 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ pop_reg(saved_regs, sp); } +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register tmp1, const Register tmp2) { + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ ld(tmp1, Address(thread, in_bytes(index_offset))); // tmp1 := *(index address) + __ beqz(tmp1, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ sub(tmp1, tmp1, wordSize); // tmp1 := next index + __ sd(tmp1, Address(thread, in_bytes(index_offset))); // *(index address) := next index + __ ld(tmp2, Address(thread, in_bytes(buffer_offset))); // tmp2 := buffer address + __ add(tmp2, tmp2, tmp1); + __ sd(value, Address(tmp2)); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ lwu(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ lbu(tmp1, in_progress); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ beqz(pre_val, done, true); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp1, tmp2); + __ j(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -116,43 +168,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert_different_registers(obj, pre_val, tmp1, tmp2); assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { // 4-byte width - __ lwu(tmp1, in_progress); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ lbu(tmp1, in_progress); - } + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is not active (*(mark queue active address) == 0), jump to done __ beqz(tmp1, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ beqz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ ld(tmp1, index); // tmp := *index_adr - __ beqz(tmp1, runtime); // tmp == 0? - // If yes, goto runtime - - __ sub(tmp1, tmp1, wordSize); // tmp := tmp - wordSize - __ sd(tmp1, index); // *index_adr := tmp - __ ld(tmp2, buffer); - __ add(tmp1, tmp1, tmp2); // tmp := tmp + *buffer_adr - - // Record the previous value - __ sd(pre_val, Address(tmp1, 0)); - __ j(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, done, runtime); __ bind(runtime); @@ -171,6 +190,49 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp1, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + // Does store cross heap regions? + __ xorr(tmp1, store_addr, new_val); // tmp1 := store address ^ new value + __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); // tmp1 := ((store address ^ new value) >> LogOfHRGrainBytes) + __ beqz(tmp1, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ beqz(new_val, done); + } + // Storing region crossing non-null, is card young? + __ srli(tmp1, store_addr, CardTable::card_shift()); // tmp1 := card address relative to card table base + __ load_byte_map_base(tmp2); // tmp2 := card table base address + __ add(tmp1, tmp1, tmp2); // tmp1 := card address + __ lbu(tmp2, Address(tmp1)); // tmp2 := card +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp1, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(MacroAssembler::StoreLoad); // StoreLoad membar + __ lbu(tmp2, Address(tmp1)); // tmp2 := card + __ beqz(tmp2, done, true); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ sb(zr, Address(tmp1)); // *(card address) := dirty_card_val + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp1, tmp2, t0); + __ j(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -179,73 +241,119 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register tmp2) { assert(thread == xthread, "must be"); assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0); - assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && - tmp2 != noreg, "expecting a register"); - - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, + "expecting a register"); Label done; Label runtime; - // Does store cross heap regions? - - __ xorr(tmp1, store_addr, new_val); - __ srli(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); - __ beqz(tmp1, done); - - // crosses regions, storing null? - - __ beqz(new_val, done); - - // storing region crossing non-null, is card already dirty? - - const Register card_addr = tmp1; - - __ srli(card_addr, store_addr, CardTable::card_shift()); - - // get the address of the card - __ load_byte_map_base(tmp2); - __ add(card_addr, card_addr, tmp2); - __ lbu(tmp2, Address(card_addr)); + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done (tmp2 holds the card value) __ mv(t0, (int)G1CardTable::g1_young_card_val()); - __ beq(tmp2, t0, done); - - assert((int)CardTable::dirty_card_val() == 0, "must be 0"); - - __ membar(MacroAssembler::StoreLoad); - - __ lbu(tmp2, Address(card_addr)); - __ beqz(tmp2, done); - - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. - - __ sb(zr, Address(card_addr)); - - __ ld(t0, queue_index); - __ beqz(t0, runtime); - __ sub(t0, t0, wordSize); - __ sd(t0, queue_index); - - __ ld(tmp2, buffer); - __ add(t0, tmp2, t0); - __ sd(card_addr, Address(t0, 0)); - __ j(done); + __ beq(tmp2, t0, done); // card == young_card_val? + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, done, runtime); __ bind(runtime); // save the live input values RegSet saved = RegSet::of(store_addr); __ push_reg(saved, sp); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp1, thread); __ pop_reg(saved, sp); __ bind(done); } +#if defined(COMPILER2) + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mv(c_rarg0, arg); + } + __ mv(c_rarg1, xthread); + __ mv(t0, runtime_path); + __ jalr(t0); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* stub) { + assert(thread == xthread, "must be"); + assert_different_registers(obj, pre_val, tmp1, tmp2); + assert(pre_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, tmp2); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ bnez(tmp1, *stub->entry(), true); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ j(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + assert(thread == xthread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, t0); + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, + "expecting a register"); + + stub->initialize_registers(thread, tmp1, tmp2); + + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp1, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) (tmp2 holds the card value) + __ mv(t0, (int)G1CardTable::g1_young_card_val()); + __ bne(tmp2, t0, *stub->entry(), true); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp1, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ j(*stub->continuation()); +} + +#endif // COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2) { bool on_oop = is_reference_type(type); diff --git a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp index 96568994079..c7bee2ef6f3 100644 --- a/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/gc/g1/g1BarrierSetAssembler_riscv.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved. + * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,6 +36,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -72,6 +74,27 @@ public: void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); #endif +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + Register tmp2, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif + void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp2); }; diff --git a/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad new file mode 100644 index 00000000000..1dc5834dbdc --- /dev/null +++ b/src/hotspot/cpu/riscv/gc/g1/g1_riscv.ad @@ -0,0 +1,564 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2024, Huawei Technologies Co., Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_riscv.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + Register tmp2, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, xthread, tmp1, tmp2, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, xthread, tmp1, tmp2, stub); +} + +%} + +instruct g1StoreP(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "sd $src, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ sd($src$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1StoreN(indirect mem, iRegN src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "sw $src, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + __ sw($src$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ decode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + } + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(STORE_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "sw $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp1$$Register, $src$$Register); + } else { + __ encode_heap_oop_not_null($tmp1$$Register, $src$$Register); + } + __ sw($tmp1$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(istore_reg_mem); +%} + +instruct g1CompareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP and its Acq variant. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $res = $mem, $oldval, $newval\t# narrow oop" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapP(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (ptr)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegP oldval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (ptr)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapN(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "cmpxchg $mem, $oldval, $newval\t# (narrow oop)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1CompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegN oldval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "cmpxchg_acq $mem, $oldval, $newval\t# (narrow oop)\n\t" + "mv $res, $res == $oldval" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($oldval$$Register, $mem$$Register); + assert_different_registers($newval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register, Assembler::uint32, + /*acquire*/ Assembler::aq, /*release*/ Assembler::rl, $res$$Register, + /*result as bool*/ true); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_slow); +%} + +instruct g1GetAndSetP(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchg $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchg($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetPAcq(indirect mem, iRegP newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp preval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetP mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchg_acq $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $preval$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgal($preval$$Register, $newval$$Register, $mem$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetN(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(2 * VOLATILE_REF_COST); + format %{ "atomic_xchgwu $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgwu($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1GetAndSetNAcq(indirect mem, iRegN newval, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3, iRegNNoSp preval) +%{ + predicate(UseG1GC && needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() != 0); + match(Set preval (GetAndSetN mem newval)); + effect(TEMP preval, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(VOLATILE_REF_COST); + format %{ "atomic_xchgwu_acq $preval, $newval, [$mem]" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */, + RegSet::of($mem$$Register, $preval$$Register, $newval$$Register) /* preserve */); + __ atomic_xchgalwu($preval$$Register, $newval$$Register, $mem$$Register); + __ decode_heap_oop($tmp1$$Register, $newval$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_serial); +%} + +instruct g1LoadP(iRegPNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2); + ins_cost(LOAD_COST + BRANCH_COST); + format %{ "ld $dst, $mem\t# ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ ld($dst$$Register, Address($mem$$Register)); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(iload_reg_mem); +%} + +instruct g1LoadN(iRegNNoSp dst, indirect mem, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3); + ins_cost(LOAD_COST + BRANCH_COST); + format %{ "lwu $dst, $mem\t# compressed ptr" %} + ins_encode %{ + guarantee($mem$$disp == 0, "impossible encoding"); + __ lwu($dst$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ decode_heap_oop($tmp1$$Register, $dst$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + } + %} + ins_pipe(iload_reg_mem); +%} diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 05f55fd0da7..563dfd4cde9 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -2224,7 +2224,8 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { assert_cond(m != nullptr); if (is_vshift_con_pattern(n, m) || // ShiftV src (ShiftCntV con) is_vector_bitwise_not_pattern(n, m) || - is_vector_scalar_bitwise_pattern(n, m)) { + is_vector_scalar_bitwise_pattern(n, m) || + is_encode_and_store_pattern(n, m)) { mstack.push(m, Visit); return true; } @@ -4785,6 +4786,7 @@ instruct loadP(iRegPNoSp dst, memory mem) // Load Compressed Pointer instruct loadN(iRegNNoSp dst, memory mem) %{ + predicate(n->as_Load()->barrier_data() == 0); match(Set dst (LoadN mem)); ins_cost(LOAD_COST); @@ -5220,6 +5222,7 @@ instruct storeimmP0(immP0 zero, memory mem) // Store Compressed Pointer instruct storeN(iRegN src, memory mem) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(STORE_COST); @@ -5234,6 +5237,7 @@ instruct storeN(iRegN src, memory mem) instruct storeImmN0(immN0 zero, memory mem) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem zero)); ins_cost(STORE_COST); @@ -5424,6 +5428,7 @@ instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + ALU_COST * 8 + BRANCH_COST * 4); @@ -5545,7 +5550,7 @@ instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP new instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem (Binary oldval newval))); @@ -5653,6 +5658,7 @@ instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL ne instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 3 + ALU_COST * 3); @@ -5786,7 +5792,7 @@ instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndExchangeN mem (Binary oldval newval))); @@ -5914,6 +5920,7 @@ instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL ne instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(LOAD_COST + STORE_COST + BRANCH_COST * 2 + ALU_COST * 4); @@ -6045,7 +6052,7 @@ instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); @@ -6117,6 +6124,8 @@ instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); + match(Set prev (GetAndSetN mem newv)); ins_cost(ALU_COST); @@ -6182,7 +6191,7 @@ instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{ - predicate(needs_acquiring_load_reserved(n)); + predicate(needs_acquiring_load_reserved(n) && n->as_LoadStore()->barrier_data() == 0); match(Set prev (GetAndSetN mem newv)); diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp index 37631298920..544c82d34a7 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, 2023 SAP SE. All rights reserved. + * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,11 +42,47 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> -#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) +#define BLOCK_COMMENT(str) __ block_comment(str) + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread, + const Register tmp1) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ load_and_test_int(tmp1, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ load_and_test_byte(tmp1, in_progress); + } +} + +static void generate_queue_test_and_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register Z_thread, const Register value, const Register temp) { + BLOCK_COMMENT("generate_queue_test_and_insertion {"); + + assert_different_registers(temp, value); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + + __ load_and_test_long(temp, Address(Z_thread, in_bytes(index_offset))); // temp := *(index address) + __ branch_optimized(Assembler::bcondEqual, runtime); // jump to runtime if index == 0 (full buffer) + + // The buffer is not full, store value into it. + __ add2reg(temp, -wordSize); // temp := next index + __ z_stg(temp, in_bytes(index_offset), Z_thread); // *(index address) := next index + + __ z_ag(temp, Address(Z_thread, in_bytes(buffer_offset))); // temp := buffer address + next index + __ z_stg(value, 0, temp); // *(buffer address + next index) := value + BLOCK_COMMENT("} generate_queue_test_and_insertion"); +} void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) { @@ -59,13 +95,8 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm assert_different_registers(addr, Z_R0_scratch); // would be destroyed by push_frame() assert_different_registers(count, Z_R0_scratch); // would be destroyed by push_frame() Register Rtmp1 = Z_R0_scratch; - const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); - } + + generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1); __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); // Creates frame. @@ -100,6 +131,181 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas } } +#if defined(COMPILER2) + +#undef __ +#define __ masm-> + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register pre_val, const address runtime_path) { + BLOCK_COMMENT("generate_c2_barrier_runtime_call {"); + SaveLiveRegisters save_registers(masm, stub); + __ call_VM_leaf(runtime_path, pre_val, Z_thread); + BLOCK_COMMENT("} generate_c2_barrier_runtime_call"); +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + G1PreBarrierStubC2* stub) { + + BLOCK_COMMENT("g1_write_barrier_pre_c2 {"); + + assert(thread == Z_thread, "must be"); + assert_different_registers(obj, pre_val, tmp1); + assert(pre_val != noreg && tmp1 != noreg, "expecting a register"); + + stub->initialize_registers(obj, pre_val, thread, tmp1, noreg); + + generate_pre_barrier_fast_path(masm, thread, tmp1); + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); // Activity indicator is zero, so there is no marking going on currently. + + __ bind(*stub->continuation()); + + BLOCK_COMMENT("} g1_write_barrier_pre_c2"); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + + BLOCK_COMMENT("generate_c2_pre_barrier_stub {"); + + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); + + __ bind(*stub->entry()); + + BLOCK_COMMENT("generate_pre_val_not_null_test {"); + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj), noreg, noreg, AS_RAW); + } + __ z_ltgr(pre_val, pre_val); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_pre_val_not_null_test"); + + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + Z_thread, pre_val, tmp1); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + __ bind(runtime); + + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + BLOCK_COMMENT("} generate_c2_pre_barrier_stub"); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* stub) { + BLOCK_COMMENT("g1_write_barrier_post_c2 {"); + + assert(thread == Z_thread, "must be"); + assert_different_registers(store_addr, new_val, thread, tmp1, tmp2, Z_R1_scratch); + + assert(store_addr != noreg && new_val != noreg && tmp1 != noreg && tmp2 != noreg, "expecting a register"); + + stub->initialize_registers(thread, tmp1, tmp2); + + BLOCK_COMMENT("generate_region_crossing_test {"); + if (VM_Version::has_DistinctOpnds()) { + __ z_xgrk(tmp1, store_addr, new_val); + } else { + __ z_lgr(tmp1, store_addr); + __ z_xgr(tmp1, new_val); + } + __ z_srag(tmp1, tmp1, G1HeapRegion::LogOfHRGrainBytes); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_region_crossing_test"); + + // crosses regions, storing null? + if ((stub->barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ z_ltgr(new_val, new_val); + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + } + + BLOCK_COMMENT("generate_card_young_test {"); + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + // calculate address of card + __ load_const_optimized(tmp2, (address)ct->card_table()->byte_map_base()); // Card table base. + __ z_srlg(tmp1, store_addr, CardTable::card_shift()); // Index into card table. + __ z_algr(tmp1, tmp2); // Explicit calculation needed for cli. + + // Filter young. + __ z_cli(0, tmp1, G1CardTable::g1_young_card_val()); + + BLOCK_COMMENT("} generate_card_young_test"); + + // From here on, tmp1 holds the card address. + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); + + __ bind(*stub->continuation()); + + BLOCK_COMMENT("} g1_write_barrier_post_c2"); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + + BLOCK_COMMENT("generate_c2_post_barrier_stub {"); + + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + + Register thread = stub->thread(); + Register tmp1 = stub->tmp1(); // tmp1 holds the card address. + Register tmp2 = stub->tmp2(); + Register Rcard_addr = tmp1; + + __ bind(*stub->entry()); + + BLOCK_COMMENT("generate_card_clean_test {"); + __ z_sync(); // Required to support concurrent cleaning. + __ z_cli(0, Rcard_addr, 0); // Reload after membar. + __ branch_optimized(Assembler::bcondEqual, *stub->continuation()); + BLOCK_COMMENT("} generate_card_clean_test"); + + BLOCK_COMMENT("generate_dirty_card {"); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + STATIC_ASSERT(CardTable::dirty_card_val() == 0); + __ z_mvi(0, Rcard_addr, CardTable::dirty_card_val()); + BLOCK_COMMENT("} generate_dirty_card"); + + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + Z_thread, tmp1, tmp2); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + __ bind(runtime); + + generate_c2_barrier_runtime_call(masm, stub, tmp1, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + + __ branch_optimized(Assembler::bcondAlways, *stub->continuation()); + + BLOCK_COMMENT("} generate_c2_post_barrier_stub"); +} + +#endif //COMPILER2 + void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null) { bool on_oop = is_reference_type(type); @@ -136,9 +342,6 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator const Register Robj = obj ? obj->base() : noreg, Roff = obj ? obj->index() : noreg; - const int active_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp must be Z_R0!! assert_different_registers(Robj, Z_R0_scratch); // Used for addressing. Furthermore, push_frame destroys Z_R0!! assert_different_registers(Rval, Z_R0_scratch); // push_frame destroys Z_R0!! @@ -147,14 +350,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator BLOCK_COMMENT("g1_write_barrier_pre {"); - // Is marking active? - // Note: value is loaded for test purposes only. No further use here. - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); - } else { - guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); - } + generate_pre_barrier_fast_path(masm, Z_thread, Rtmp1); __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. assert(Rpre_val != noreg, "must have a real register"); @@ -194,24 +390,14 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Decorator // We can store the original value in the thread's buffer // only if index > 0. Otherwise, we need runtime to handle. // (The index field is typed as size_t.) - Register Rbuffer = Rtmp1, Rindex = Rtmp2; - assert_different_registers(Rbuffer, Rindex, Rpre_val); - __ z_lg(Rbuffer, buffer_offset, Z_thread); - - __ load_and_test_long(Rindex, Address(Z_thread, index_offset)); - __ z_bre(callRuntime); // If index == 0, goto runtime. - - __ add2reg(Rindex, -wordSize); // Decrement index. - __ z_stg(Rindex, index_offset, Z_thread); - - // Record the previous value. - __ z_stg(Rpre_val, 0, Rbuffer, Rindex); + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + callRuntime, + Z_thread, Rpre_val, Rtmp2); __ z_bru(filtered); // We are done. - Rbuffer = noreg; // end of life - Rindex = noreg; // end of life - __ bind(callRuntime); // Save some registers (inputs and result) over runtime call @@ -326,23 +512,16 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Decorato Register Rcard_addr_x = Rcard_addr; Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1; - Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1; - const int qidx_off = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); - const int qbuf_off = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); - if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) { + if (Rcard_addr == Rqueue_index) { Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0. } __ lgr_if_needed(Rcard_addr_x, Rcard_addr); - __ load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off)); - __ z_bre(callRuntime); // Index == 0 then jump to runtime. - - __ z_lg(Rqueue_buf, qbuf_off, Z_thread); - - __ add2reg(Rqueue_index, -wordSize); // Decrement index. - __ z_stg(Rqueue_index, qidx_off, Z_thread); - - __ z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card. + generate_queue_test_and_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + callRuntime, + Z_thread, Rcard_addr_x, Rqueue_index); __ z_bru(filtered); __ bind(callRuntime); diff --git a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp index cc1d51d2fa1..0f0bdd8b83c 100644 --- a/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/gc/g1/g1BarrierSetAssembler_s390.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018 SAP SE. All rights reserved. + * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2024 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +34,8 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -62,7 +64,27 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); -#endif +#endif // COMPILER1 + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp1, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp1, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif // COMPILER2 virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, const Address& src, Register dst, Register tmp1, Register tmp2, Label *L_handle_null = nullptr); diff --git a/src/hotspot/cpu/s390/gc/g1/g1_s390.ad b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad new file mode 100644 index 00000000000..31f60c4aeff --- /dev/null +++ b/src/hotspot/cpu/s390/gc/g1/g1_s390.ad @@ -0,0 +1,457 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// Copyright 2024 IBM Corporation. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_s390.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp1, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, Z_thread, tmp1, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, Z_thread, tmp1, tmp2, stub); +} + +%} // source + +// store pointer +instruct g1StoreP(indirect dst, memoryRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set dst (StoreP dst src)); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "STG $src,$dst\t # ptr" %} + ins_encode %{ + __ block_comment("g1StoreP {"); + write_barrier_pre(masm, this, + $dst$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($dst$$Register, $src$$Register) /* preserve */); + + __ z_stg($src$$Register, Address($dst$$Register)); + + write_barrier_post(masm, this, + $dst$$Register, /* store_addr */ + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1StoreP"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Store Compressed Pointer +instruct g1StoreN(indirect mem, iRegN_P2N src, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "STY $src,$mem\t # (cOop)" %} + ins_encode %{ + __ block_comment("g1StoreN {"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + + __ z_sty($src$$Register, Address($mem$$Register)); + + if ((barrier_data() & G1C2BarrierPost) != 0) { + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ oop_decoder($tmp1$$Register, $src$$Register, true /* maybe_null */); + } else { + __ oop_decoder($tmp1$$Register, $src$$Register, false /* maybe_null */); + } + } + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + __ block_comment("} g1StoreN"); + %} + + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndSwapN(indirect mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + __ block_comment("g1compareAndSwapN {"); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + Register Rres = reg_to_register_object($res$$reg); + + write_barrier_pre(masm, this, + Raddr /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of(Raddr, Rcomp, Rnew) /* preserve */, + RegSet::of(Rres) /* no_preserve */); + + __ z_cs(Rcomp, Rnew, 0, Raddr); + + assert_different_registers(Rres, Raddr); + if (VM_Version::has_LoadStoreConditional()) { + __ load_const_optimized(Z_R0_scratch, 0L); // false (failed) + __ load_const_optimized(Rres, 1L); // true (succeed) + __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label done; + __ load_const_optimized(Rres, 0L); // false (failed) + __ z_brne(done); // Assume true to be the common case. + __ load_const_optimized(Rres, 1L); // true (succeed) + __ bind(done); + } + + __ oop_decoder($tmp3$$Register, Rnew, true /* maybe_null */); + + write_barrier_post(masm, this, + Raddr /* store_addr */, + $tmp3$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1compareAndSwapN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndExchangeN(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegN res, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeN mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndExchangeN $oldval,$newval,$mem_ptr" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + __ block_comment("g1CompareAndExchangeN {"); + write_barrier_pre(masm, this, + $mem_ptr$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + + Register Rres = reg_to_register_object($res$$reg); + assert_different_registers(Rres, Raddr); + + __ z_lgr(Rres, Rcomp); // previous contents + __ z_csy(Rres, Rnew, 0, Raddr); // Try to store new value. + + __ oop_decoder($tmp1$$Register, Rnew, true /* maybe_null */); + + write_barrier_post(masm, this, + Raddr /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + __ block_comment("} g1CompareAndExchangeN"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Load narrow oop +instruct g1LoadN(iRegN dst, indirect mem, iRegP tmp1, iRegP tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "LoadN $dst,$mem\t # (cOop)" %} + ins_encode %{ + __ block_comment("g1LoadN {"); + __ z_llgf($dst$$Register, Address($mem$$Register)); + if ((barrier_data() & G1C2BarrierPre) != 0) { + __ oop_decoder($tmp1$$Register, $dst$$Register, true); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register ); + } + __ block_comment("} g1LoadN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1GetAndSetN(indirect mem, iRegN dst, iRegI tmp, iRegL tmp1, iRegL tmp2, iRegL tmp3, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set dst (GetAndSetN mem dst)); + effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2, TEMP tmp3); // USE_DEF dst by match rule. + format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %} + ins_encode %{ + __ block_comment("g1GetAndSetN {"); + assert_different_registers($mem$$Register, $dst$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $dst$$Register) /* preserve */); + + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_llgf(Rtmp, Address($mem$$Register)); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csy(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + + __ oop_decoder($tmp1$$Register, $dst$$Register, true /* maybe_null */); + + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + + __ block_comment("} g1GetAndSetN"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndSwapP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %} + ins_encode %{ + __ block_comment("g1CompareAndSwapP {"); + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + + Register Rcomp = reg_to_register_object($oldval$$reg); + Register Rnew = reg_to_register_object($newval$$reg); + Register Raddr = reg_to_register_object($mem_ptr$$reg); + Register Rres = reg_to_register_object($res$$reg); + + write_barrier_pre(masm, this, + noreg /* obj */, + Rcomp /* pre_val */, + $tmp1$$Register /* tmp1 */, + RegSet::of(Raddr, Rcomp, Rnew) /* preserve */, + RegSet::of(Rres) /* no_preserve */); + + __ z_csg(Rcomp, Rnew, 0, Raddr); + + if (VM_Version::has_LoadStoreConditional()) { + __ load_const_optimized(Z_R0_scratch, 0L); // false (failed) + __ load_const_optimized(Rres, 1L); // true (succeed) + __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label done; + __ load_const_optimized(Rres, 0L); // false (failed) + __ z_brne(done); // Assume true to be the common case. + __ load_const_optimized(Rres, 1L); // true (succeed) + __ bind(done); + } + + write_barrier_post(masm, this, + Raddr /* store_addr */, + Rnew /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1CompareAndSwapP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1CompareAndExchangeP(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegP res, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndExchangeP mem_ptr (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, USE mem_ptr, USE_KILL oldval, KILL cr); + format %{ "$res = CompareAndExchangeP $oldval,$newval,$mem_ptr" %} + ins_encode %{ + __ block_comment("g1CompareAndExchangeP {"); + assert_different_registers($oldval$$Register, $mem_ptr$$Register); + assert_different_registers($newval$$Register, $mem_ptr$$Register); + + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem_ptr$$Register, $oldval$$Register, $newval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + + __ z_lgr($res$$Register, $oldval$$Register); // previous content + + __ z_csg($oldval$$Register, $newval$$Register, 0, $mem_ptr$$reg); + + write_barrier_post(masm, this, + $mem_ptr$$Register /* store_addr */, + $newval$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1CompareAndExchangeP"); + %} + ins_pipe(pipe_class_dummy); +%} + +// Load Pointer +instruct g1LoadP(iRegP dst, memory mem, iRegL tmp1, flagsReg cr) %{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp1, KILL cr); + ins_cost(MEMORY_REF_COST); + format %{ "LG $dst,$mem\t # ptr" %} + ins_encode %{ + __ block_comment("g1LoadP {"); + __ z_lg($dst$$Register, $mem$$Address); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp1$$Register ); + __ block_comment("} g1LoadP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1GetAndSetP(indirect mem, iRegP dst, iRegL tmp, iRegL tmp1, iRegL tmp2, flagsReg cr) %{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set dst (GetAndSetP mem dst)); + effect(KILL cr, TEMP tmp, TEMP tmp1, TEMP tmp2); // USE_DEF dst by match rule. + format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %} + ins_encode %{ + __ block_comment("g1GetAndSetP {"); + + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp$$Register /* pre_val (as a temporary register) */, + $tmp1$$Register /* tmp1 */, + RegSet::of($mem$$Register, $dst$$Register) /* preserve */); + + __ z_lgr($tmp1$$Register, $dst$$Register); + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_lg(Rtmp, Address($mem$$Register)); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csg(Rtmp, Rdst, Address($mem$$Register)); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp$$Register /* tmp2 */); + __ block_comment("} g1GetAndSetP"); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct g1EncodePAndStoreN(indirect mem, iRegP src, iRegL tmp1, iRegL tmp2, flagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, KILL cr); + // ins_cost(INSN_COST); + format %{ "encode_heap_oop $tmp1, $src\n\t" + "st $tmp1, $mem\t# compressed ptr" %} + ins_encode %{ + __ block_comment("g1EncodePAndStoreN {"); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp1 */, + RegSet::of($mem$$Register, $src$$Register) /* preserve */); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ oop_encoder($tmp1$$Register, $src$$Register, true /* maybe_null */); + } else { + __ oop_encoder($tmp1$$Register, $src$$Register, false /* maybe_null */); + } + __ z_st($tmp1$$Register, Address($mem$$Register)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp1$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + __ block_comment("} g1EncodePAndStoreN"); + %} + ins_pipe(pipe_class_dummy); +%} diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp index 28892da6ca4..d826b4a06f3 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.cpp @@ -33,6 +33,9 @@ #include "runtime/jniHandles.hpp" #include "runtime/stubRoutines.hpp" #include "utilities/macros.hpp" +#ifdef COMPILER2 +#include "gc/shared/c2/barrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -194,8 +197,93 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) { #ifdef COMPILER2 -OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { - Unimplemented(); // This must be implemented to support late barrier expansion. +OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) const { + if (!OptoReg::is_reg(opto_reg)) { + return OptoReg::Bad; + } + + VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if ((vm_reg->is_Register() || vm_reg ->is_FloatRegister()) && (opto_reg & 1) != 0) { + return OptoReg::Bad; + } + + return opto_reg; +} + +#undef __ +#define __ _masm-> + +SaveLiveRegisters::SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub) + : _masm(masm), _reg_mask(stub->preserve_set()) { + + const int register_save_size = iterate_over_register_mask(ACTION_COUNT_ONLY) * BytesPerWord; + + _frame_size = align_up(register_save_size, frame::alignment_in_bytes) + frame::z_abi_160_size; // FIXME: this could be restricted to argument only + + __ save_return_pc(); + __ push_frame(_frame_size, Z_R14); // FIXME: check if Z_R1_scaratch can do a job here; + + __ z_lg(Z_R14, _z_common_abi(return_pc) + _frame_size, Z_SP); + + iterate_over_register_mask(ACTION_SAVE, _frame_size); +} + +SaveLiveRegisters::~SaveLiveRegisters() { + iterate_over_register_mask(ACTION_RESTORE, _frame_size); + + __ pop_frame(); + + __ restore_return_pc(); +} + +int SaveLiveRegisters::iterate_over_register_mask(IterationAction action, int offset) { + int reg_save_index = 0; + RegMaskIterator live_regs_iterator(_reg_mask); + + while(live_regs_iterator.has_next()) { + const OptoReg::Name opto_reg = live_regs_iterator.next(); + + // Filter out stack slots (spilled registers, i.e., stack-allocated registers). + if (!OptoReg::is_reg(opto_reg)) { + continue; + } + + const VMReg vm_reg = OptoReg::as_VMReg(opto_reg); + if (vm_reg->is_Register()) { + Register std_reg = vm_reg->as_Register(); + + if (std_reg->encoding() >= Z_R2->encoding() && std_reg->encoding() <= Z_R15->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + __ z_stg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else if (action == ACTION_RESTORE) { + __ z_lg(std_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else { + assert(action == ACTION_COUNT_ONLY, "Sanity"); + } + } + } else if (vm_reg->is_FloatRegister()) { + FloatRegister fp_reg = vm_reg->as_FloatRegister(); + if (fp_reg->encoding() >= Z_F0->encoding() && fp_reg->encoding() <= Z_F15->encoding() + && fp_reg->encoding() != Z_F1->encoding()) { + reg_save_index++; + + if (action == ACTION_SAVE) { + __ z_std(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else if (action == ACTION_RESTORE) { + __ z_ld(fp_reg, offset - reg_save_index * BytesPerWord, Z_SP); + } else { + assert(action == ACTION_COUNT_ONLY, "Sanity"); + } + } + } else if (false /* vm_reg->is_VectorRegister() */){ + fatal("Vector register support is not there yet!"); + } else { + fatal("Register type is not known"); + } + } + return reg_save_index; } #endif // COMPILER2 diff --git a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp index de1de8a51a7..fb61adc55b5 100644 --- a/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/gc/shared/barrierSetAssembler_s390.hpp @@ -32,7 +32,9 @@ #ifdef COMPILER2 #include "code/vmreg.hpp" #include "opto/optoreg.hpp" +#include "opto/regmask.hpp" +class BarrierStubC2; class Node; #endif // COMPILER2 @@ -62,8 +64,42 @@ public: #ifdef COMPILER2 OptoReg::Name refine_register(const Node* node, - OptoReg::Name opto_reg); + OptoReg::Name opto_reg) const; #endif // COMPILER2 }; +#ifdef COMPILER2 + +// This class saves and restores the registers that need to be preserved across +// the runtime call represented by a given C2 barrier stub. Use as follows: +// { +// SaveLiveRegisters save(masm, stub); +// .. +// __ call_VM_leaf(...); +// .. +// } + +class SaveLiveRegisters { + MacroAssembler* _masm; + RegMask _reg_mask; + Register _result_reg; + int _frame_size; + + public: + SaveLiveRegisters(MacroAssembler *masm, BarrierStubC2 *stub); + + ~SaveLiveRegisters(); + + private: + enum IterationAction : int { + ACTION_SAVE, + ACTION_RESTORE, + ACTION_COUNT_ONLY + }; + + int iterate_over_register_mask(IterationAction action, int offset = 0); +}; + +#endif // COMPILER2 + #endif // CPU_S390_GC_SHARED_BARRIERSETASSEMBLER_S390_HPP diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index af281345b14..e192bbab0de 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -2127,8 +2127,9 @@ unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { // Pop current C frame. void MacroAssembler::pop_frame() { - BLOCK_COMMENT("pop_frame:"); + BLOCK_COMMENT("pop_frame {"); Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); + BLOCK_COMMENT("} pop_frame"); } // Pop current C frame and restore return PC register (Z_R14). diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp index 931e899257e..18af232e569 100644 --- a/src/hotspot/cpu/s390/register_s390.hpp +++ b/src/hotspot/cpu/s390/register_s390.hpp @@ -448,4 +448,12 @@ constexpr Register Z_R0_scratch = Z_R0; constexpr Register Z_R1_scratch = Z_R1; constexpr FloatRegister Z_fscratch_1 = Z_F1; +typedef AbstractRegSet RegSet; + +template <> +inline Register AbstractRegSet::first() { + if (_bitset == 0) { return noreg; } + return as_Register(count_trailing_zeros(_bitset)); +} + #endif // CPU_S390_REGISTER_S390_HPP diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 1bc94842150..8181e96ecfc 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1644,6 +1644,10 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() { // Should the matcher clone input 'm' of node 'n'? bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } @@ -3913,6 +3917,7 @@ instruct loadL_unaligned(iRegL dst, memory mem) %{ // Load Pointer instruct loadP(iRegP dst, memory mem) %{ match(Set dst (LoadP mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LG $dst,$mem\t # ptr" %} @@ -3924,6 +3929,7 @@ instruct loadP(iRegP dst, memory mem) %{ // LoadP + CastP2L instruct castP2X_loadP(iRegL dst, memory mem) %{ match(Set dst (CastP2X (LoadP mem))); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LG $dst,$mem\t # ptr + p2x" %} @@ -4286,6 +4292,7 @@ instruct storeL(memory mem, iRegL src) %{ // Store Pointer instruct storeP(memory dst, memoryRegP src) %{ match(Set dst (StoreP dst src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "STG $src,$dst\t # ptr" %} @@ -4388,6 +4395,7 @@ instruct memInitL(memoryRS mem, immL16 src) %{ // Move Immediate to 8-byte memory. instruct memInitP(memoryRS mem, immP16 src) %{ match(Set mem (StoreP mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(6); format %{ "MVGHI $mem,$src\t # direct mem init 8" %} @@ -4417,6 +4425,7 @@ instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{ // Load narrow oop instruct loadN(iRegN dst, memory mem) %{ match(Set dst (LoadN mem)); + predicate(n->as_Load()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "LoadN $dst,$mem\t # (cOop)" %} @@ -4480,7 +4489,7 @@ instruct loadConNKlass(iRegN dst, immNKlass src) %{ instruct decodeLoadN(iRegP dst, memory mem) %{ match(Set dst (DecodeN (LoadN mem))); - predicate(false && (CompressedOops::base()==nullptr)&&(CompressedOops::shift()==0)); + predicate(false && (CompressedOops::base()==nullptr) && (CompressedOops::shift()==0)); ins_cost(MEMORY_REF_COST); size(Z_DISP3_SIZE); format %{ "DecodeLoadN $dst,$mem\t # (cOop Load+Decode)" %} @@ -4735,6 +4744,7 @@ instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{ // Store Compressed Pointer instruct storeN(memory mem, iRegN_P2N src) %{ match(Set mem (StoreN mem src)); + predicate(n->as_Store()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(Z_DISP_SIZE); format %{ "ST $src,$mem\t # (cOop)" %} @@ -5146,6 +5156,7 @@ instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRe instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(USE mem_ptr, USE_KILL oldval, KILL cr); size(18); format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %} @@ -5156,6 +5167,7 @@ instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(USE mem_ptr, USE_KILL oldval, KILL cr); size(16); format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %} @@ -5443,6 +5455,7 @@ instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{ %} instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set dst (GetAndSetN mem dst)); effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %} @@ -5452,6 +5465,7 @@ instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{ instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{ match(Set dst (GetAndSetP mem dst)); + predicate(n->as_LoadStore()->barrier_data() == 0); effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %} ins_encode(z_enc_SwapL(mem, dst, tmp)); @@ -5926,7 +5940,7 @@ instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{ match(Set mem (StoreP mem (AddP (LoadP mem) src))); effect(KILL cr); - predicate(VM_Version::has_MemWithImmALUOps()); + predicate(VM_Version::has_MemWithImmALUOps() && n->as_LoadStore()->barrier_data() == 0); ins_cost(MEMORY_REF_COST); size(6); format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %} diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index b52be627776..b6be4012519 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -38,7 +38,10 @@ #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" #include "gc/g1/c1/g1BarrierSetC1.hpp" -#endif +#endif // COMPILER1 +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif // COMPILER2 #define __ masm-> @@ -160,6 +163,56 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator } } +static void generate_queue_insertion(MacroAssembler* masm, ByteSize index_offset, ByteSize buffer_offset, Label& runtime, + const Register thread, const Register value, const Register temp) { + // This code assumes that buffer index is pointer sized. + STATIC_ASSERT(in_bytes(SATBMarkQueue::byte_width_of_index()) == sizeof(intptr_t)); + // Can we store a value in the given thread's buffer? + // (The index field is typed as size_t.) + __ movptr(temp, Address(thread, in_bytes(index_offset))); // temp := *(index address) + __ testptr(temp, temp); // index == 0? + __ jcc(Assembler::zero, runtime); // jump to runtime if index == 0 (full buffer) + // The buffer is not full, store value into it. + __ subptr(temp, wordSize); // temp := next index + __ movptr(Address(thread, in_bytes(index_offset)), temp); // *(index address) := next index + __ addptr(temp, Address(thread, in_bytes(buffer_offset))); // temp := buffer address + next index + __ movptr(Address(temp, 0), value); // *(buffer address + next index) := value +} + +static void generate_pre_barrier_fast_path(MacroAssembler* masm, + const Register thread) { + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpl(in_progress, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(in_progress, 0); + } +} + +static void generate_pre_barrier_slow_path(MacroAssembler* masm, + const Register obj, + const Register pre_val, + const Register thread, + const Register tmp, + Label& done, + Label& runtime) { + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); + } + // Is the previous value null? + __ cmpptr(pre_val, NULL_WORD); + __ jcc(Assembler::equal, done); + generate_queue_insertion(masm, + G1ThreadLocalData::satb_mark_queue_index_offset(), + G1ThreadLocalData::satb_mark_queue_buffer_offset(), + runtime, + thread, pre_val, tmp); + __ jmp(done); +} + void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -185,43 +238,10 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, assert(pre_val != rax, "check this code"); } - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { - __ cmpl(in_progress, 0); - } else { - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); - __ cmpb(in_progress, 0); - } + generate_pre_barrier_fast_path(masm, thread); + // If marking is not active (*(mark queue active address) == 0), jump to done __ jcc(Assembler::equal, done); - - // Do we need to load the previous value? - if (obj != noreg) { - __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW); - } - - // Is the previous value null? - __ cmpptr(pre_val, NULL_WORD); - __ jcc(Assembler::equal, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - __ movptr(tmp, index); // tmp := *index_adr - __ cmpptr(tmp, 0); // tmp == 0? - __ jcc(Assembler::equal, runtime); // If yes, goto runtime - - __ subptr(tmp, wordSize); // tmp := tmp - wordSize - __ movptr(index, tmp); // *index_adr := tmp - __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr - - // Record the previous value - __ movptr(Address(tmp, 0), pre_val); - __ jmp(done); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, done, runtime); __ bind(runtime); @@ -263,6 +283,54 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, __ bind(done); } +static void generate_post_barrier_fast_path(MacroAssembler* masm, + const Register store_addr, + const Register new_val, + const Register tmp, + const Register tmp2, + Label& done, + bool new_val_may_be_null) { + CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); + // Does store cross heap regions? + __ movptr(tmp, store_addr); // tmp := store address + __ xorptr(tmp, new_val); // tmp := store address ^ new value + __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); // ((store address ^ new value) >> LogOfHRGrainBytes) == 0? + __ jcc(Assembler::equal, done); + // Crosses regions, storing null? + if (new_val_may_be_null) { + __ cmpptr(new_val, NULL_WORD); // new value == null? + __ jcc(Assembler::equal, done); + } + // Storing region crossing non-null, is card young? + __ movptr(tmp, store_addr); // tmp := store address + __ shrptr(tmp, CardTable::card_shift()); // tmp := card address relative to card table base + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ movptr(tmp2, (intptr_t)ct->card_table()->byte_map_base()); // tmp2 := card table base address + __ addptr(tmp, tmp2); // tmp := card address + __ cmpb(Address(tmp, 0), G1CardTable::g1_young_card_val()); // *(card address) == young_card_val? +} + +static void generate_post_barrier_slow_path(MacroAssembler* masm, + const Register thread, + const Register tmp, + const Register tmp2, + Label& done, + Label& runtime) { + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); // StoreLoad membar + __ cmpb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) == dirty_card_val? + __ jcc(Assembler::equal, done); + // Storing a region crossing, non-null oop, card is clean. + // Dirty card and log. + __ movb(Address(tmp, 0), G1CardTable::dirty_card_val()); // *(card address) := dirty_card_val + generate_queue_insertion(masm, + G1ThreadLocalData::dirty_card_queue_index_offset(), + G1ThreadLocalData::dirty_card_queue_buffer_offset(), + runtime, + thread, tmp, tmp2); + __ jmp(done); +} + void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register store_addr, Register new_val, @@ -273,74 +341,125 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, assert(thread == r15_thread, "must be"); #endif // _LP64 - Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - CardTableBarrierSet* ct = - barrier_set_cast(BarrierSet::barrier_set()); - Label done; Label runtime; - // Does store cross heap regions? - - __ movptr(tmp, store_addr); - __ xorptr(tmp, new_val); - __ shrptr(tmp, G1HeapRegion::LogOfHRGrainBytes); + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, done, true /* new_val_may_be_null */); + // If card is young, jump to done __ jcc(Assembler::equal, done); - - // crosses regions, storing null? - - __ cmpptr(new_val, NULL_WORD); - __ jcc(Assembler::equal, done); - - // storing region crossing non-null, is card already dirty? - - const Register card_addr = tmp; - const Register cardtable = tmp2; - - __ movptr(card_addr, store_addr); - __ shrptr(card_addr, CardTable::card_shift()); - // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT - // a valid address and therefore is not properly handled by the relocation code. - __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base()); - __ addptr(card_addr, cardtable); - - __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val()); - __ jcc(Assembler::equal, done); - - __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); - __ cmpb(Address(card_addr, 0), G1CardTable::dirty_card_val()); - __ jcc(Assembler::equal, done); - - - // storing a region crossing, non-null oop, card is clean. - // dirty card and log. - - __ movb(Address(card_addr, 0), G1CardTable::dirty_card_val()); - - // The code below assumes that buffer index is pointer sized. - STATIC_ASSERT(in_bytes(G1DirtyCardQueue::byte_width_of_index()) == sizeof(intptr_t)); - - __ movptr(tmp2, queue_index); - __ testptr(tmp2, tmp2); - __ jcc(Assembler::zero, runtime); - __ subptr(tmp2, wordSize); - __ movptr(queue_index, tmp2); - __ addptr(tmp2, buffer); - __ movptr(Address(tmp2, 0), card_addr); - __ jmp(done); + generate_post_barrier_slow_path(masm, thread, tmp, tmp2, done, runtime); __ bind(runtime); // save the live input values RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread)); __ push_set(saved); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread); __ pop_set(saved); __ bind(done); } +#if defined(COMPILER2) + +static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { +#ifdef _LP64 + SaveLiveRegisters save_registers(masm, stub); + if (c_rarg0 != arg) { + __ mov(c_rarg0, arg); + } + __ mov(c_rarg1, r15_thread); + // rax is a caller-saved, non-argument-passing register, so it does not + // interfere with c_rarg0 or c_rarg1. If it contained any live value before + // entering this stub, it is saved at this point, and restored after the + // call. If it did not contain any live value, it is free to be used. In + // either case, it is safe to use it here as a call scratch register. + __ call(RuntimeAddress(runtime_path), rax); +#else + Unimplemented(); +#endif // _LP64 +} + +void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + G1PreBarrierStubC2* stub) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + assert(pre_val != noreg, "check this code"); + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + } + + stub->initialize_registers(obj, pre_val, thread, tmp); + + generate_pre_barrier_fast_path(masm, thread); + // If marking is active (*(mark queue active address) != 0), jump to stub (slow path) + __ jcc(Assembler::notEqual, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register obj = stub->obj(); + Register pre_val = stub->pre_val(); + Register thread = stub->thread(); + Register tmp = stub->tmp1(); + assert(stub->tmp2() == noreg, "not needed in this platform"); + + __ bind(*stub->entry()); + generate_pre_barrier_slow_path(masm, obj, pre_val, thread, tmp, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, pre_val, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry)); + __ jmp(*stub->continuation()); +} + +void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2, + G1PostBarrierStubC2* stub) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + + stub->initialize_registers(thread, tmp, tmp2); + + bool new_val_may_be_null = (stub->barrier_data() & G1C2BarrierPostNotNull) == 0; + generate_post_barrier_fast_path(masm, store_addr, new_val, tmp, tmp2, *stub->continuation(), new_val_may_be_null); + // If card is not young, jump to stub (slow path) + __ jcc(Assembler::notEqual, *stub->entry()); + + __ bind(*stub->continuation()); +} + +void G1BarrierSetAssembler::generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const { + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + Label runtime; + Register thread = stub->thread(); + Register tmp = stub->tmp1(); // tmp holds the card address. + Register tmp2 = stub->tmp2(); + assert(stub->tmp3() == noreg, "not needed in this platform"); + + __ bind(*stub->entry()); + generate_post_barrier_slow_path(masm, thread, tmp, tmp2, *stub->continuation(), runtime); + + __ bind(runtime); + generate_c2_barrier_runtime_call(masm, stub, tmp, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)); + __ jmp(*stub->continuation()); +} + +#endif // COMPILER2 + void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) { bool in_heap = (decorators & IN_HEAP) != 0; diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp index a5695f5657a..4dbb1efd885 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.hpp @@ -32,6 +32,9 @@ class LIR_Assembler; class StubAssembler; class G1PreBarrierStub; class G1PostBarrierStub; +class G1BarrierStubC2; +class G1PreBarrierStubC2; +class G1PostBarrierStubC2; class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { protected: @@ -65,6 +68,26 @@ class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register dst, Address src, Register tmp1, Register tmp_thread); + +#ifdef COMPILER2 + void g1_write_barrier_pre_c2(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + G1PreBarrierStubC2* c2_stub); + void generate_c2_pre_barrier_stub(MacroAssembler* masm, + G1PreBarrierStubC2* stub) const; + void g1_write_barrier_post_c2(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2, + G1PostBarrierStubC2* c2_stub); + void generate_c2_post_barrier_stub(MacroAssembler* masm, + G1PostBarrierStubC2* stub) const; +#endif // COMPILER2 }; #endif // CPU_X86_GC_G1_G1BARRIERSETASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad new file mode 100644 index 00000000000..8c1559f90f4 --- /dev/null +++ b/src/hotspot/cpu/x86/gc/g1/g1_x86_64.ad @@ -0,0 +1,371 @@ +// +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +source_hpp %{ + +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/shared/gc_globals.hpp" + +%} + +source %{ + +#include "gc/g1/g1BarrierSetAssembler_x86.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" + +static void write_barrier_pre(MacroAssembler* masm, + const MachNode* node, + Register obj, + Register pre_val, + Register tmp, + RegSet preserve = RegSet(), + RegSet no_preserve = RegSet()) { + if (!G1PreBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PreBarrierStubC2* const stub = G1PreBarrierStubC2::create(node); + for (RegSetIterator reg = preserve.begin(); *reg != noreg; ++reg) { + stub->preserve(*reg); + } + for (RegSetIterator reg = no_preserve.begin(); *reg != noreg; ++reg) { + stub->dont_preserve(*reg); + } + g1_asm->g1_write_barrier_pre_c2(masm, obj, pre_val, r15_thread, tmp, stub); +} + +static void write_barrier_post(MacroAssembler* masm, + const MachNode* node, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2) { + if (!G1PostBarrierStubC2::needs_barrier(node)) { + return; + } + Assembler::InlineSkippedInstructionsCounter skip_counter(masm); + G1BarrierSetAssembler* g1_asm = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + G1PostBarrierStubC2* const stub = G1PostBarrierStubC2::create(node); + g1_asm->g1_write_barrier_post_c2(masm, store_addr, new_val, r15_thread, tmp1, tmp2, stub); +} + +%} + +instruct g1StoreP(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreP mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "movq $mem, $src\t# ptr" %} + ins_encode %{ + // Materialize the store address internally (as opposed to defining 'mem' as + // an indirect memory operand) to reduce the overhead of LCM when processing + // large basic blocks with many stores. Such basic blocks arise, for + // instance, from static initializations of large String arrays. + // The same holds for g1StoreN and g1EncodePAndStoreN. + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movq(Address($tmp1$$Register, 0), $src$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1StoreN(memory mem, rRegN src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem src)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "movl $mem, $src\t# ptr" %} + ins_encode %{ + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movl(Address($tmp1$$Register, 0), $src$$Register); + if ((barrier_data() & G1C2BarrierPost) != 0) { + __ movl($tmp2$$Register, $src$$Register); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ decode_heap_oop($tmp2$$Register); + } else { + __ decode_heap_oop_not_null($tmp2$$Register); + } + } + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $tmp2$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1EncodePAndStoreN(memory mem, any_RegP src, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Store()->barrier_data() != 0); + match(Set mem (StoreN mem (EncodeP src))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + ins_cost(125); // XXX + format %{ "encode_heap_oop $src\n\t" + "movl $mem, $src\t# ptr" %} + ins_encode %{ + __ lea($tmp1$$Register, $mem$$Address); + write_barrier_pre(masm, this, + $tmp1$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($tmp1$$Register, $src$$Register) /* preserve */); + __ movq($tmp2$$Register, $src$$Register); + if ((barrier_data() & G1C2BarrierPostNotNull) == 0) { + __ encode_heap_oop($tmp2$$Register); + } else { + __ encode_heap_oop_not_null($tmp2$$Register); + } + __ movl(Address($tmp1$$Register, 0), $tmp2$$Register); + write_barrier_post(masm, this, + $tmp1$$Register /* store_addr */, + $src$$Register /* new_val */, + $tmp3$$Register /* tmp1 */, + $tmp2$$Register /* tmp2 */); + %} + ins_pipe(ialu_mem_reg); +%} + +instruct g1CompareAndExchangeP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set oldval (CompareAndExchangeP mem (Binary oldval newval))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + // Pass $oldval to the pre-barrier (instead of loading from $mem), because + // $oldval is the only value that can be overwritten. + // The same holds for g1CompareAndSwapP. + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndExchangeN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set oldval (CompareAndExchangeN mem (Binary oldval newval))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0)); + __ decode_heap_oop($tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndSwapP(rRegI res, indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapP mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $oldval$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgq($tmp1$$Register, Address($mem$$Register, 0)); + __ setb(Assembler::equal, $res$$Register); + __ movzbl($res$$Register, $res$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1CompareAndSwapN(rRegI res, indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set res (CompareAndSwapN mem (Binary oldval newval))); + match(Set res (WeakCompareAndSwapN mem (Binary oldval newval))); + effect(TEMP res, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL oldval, KILL cr); + format %{ "lock\n\t" + "cmpxchgq $newval, $mem\n\t" + "sete $res\n\t" + "movzbl $res, $res" %} + ins_encode %{ + assert_different_registers($oldval$$Register, $mem$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register, $oldval$$Register) /* preserve */, + RegSet::of($res$$Register) /* no_preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ lock(); + __ cmpxchgl($tmp1$$Register, Address($mem$$Register, 0)); + __ setb(Assembler::equal, $res$$Register); + __ movzbl($res$$Register, $res$$Register); + __ decode_heap_oop($tmp1$$Register); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1GetAndSetP(indirect mem, rRegP newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set newval (GetAndSetP mem newval)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + __ movq($tmp1$$Register, $newval$$Register); + __ xchgq($newval$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1GetAndSetN(indirect mem, rRegN newval, rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_LoadStore()->barrier_data() != 0); + match(Set newval (GetAndSetN mem newval)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + format %{ "xchgq $newval, $mem" %} + ins_encode %{ + assert_different_registers($mem$$Register, $newval$$Register); + write_barrier_pre(masm, this, + $mem$$Register /* obj */, + $tmp2$$Register /* pre_val */, + $tmp3$$Register /* tmp */, + RegSet::of($mem$$Register, $newval$$Register) /* preserve */); + __ movl($tmp1$$Register, $newval$$Register); + __ decode_heap_oop($tmp1$$Register); + __ xchgl($newval$$Register, Address($mem$$Register, 0)); + write_barrier_post(masm, this, + $mem$$Register /* store_addr */, + $tmp1$$Register /* new_val */, + $tmp2$$Register /* tmp1 */, + $tmp3$$Register /* tmp2 */); + %} + ins_pipe(pipe_cmpxchg); +%} + +instruct g1LoadP(rRegP dst, memory mem, rRegP tmp, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadP mem)); + effect(TEMP dst, TEMP tmp, KILL cr); + ins_cost(125); // XXX + format %{ "movq $dst, $mem\t# ptr" %} + ins_encode %{ + __ movq($dst$$Register, $mem$$Address); + write_barrier_pre(masm, this, + noreg /* obj */, + $dst$$Register /* pre_val */, + $tmp$$Register /* tmp */); + %} + ins_pipe(ialu_reg_mem); // XXX +%} + +instruct g1LoadN(rRegN dst, memory mem, rRegP tmp1, rRegP tmp2, rFlagsReg cr) +%{ + predicate(UseG1GC && n->as_Load()->barrier_data() != 0); + match(Set dst (LoadN mem)); + effect(TEMP dst, TEMP tmp1, TEMP tmp2, KILL cr); + ins_cost(125); // XXX + format %{ "movl $dst, $mem\t# compressed ptr" %} + ins_encode %{ + __ movl($dst$$Register, $mem$$Address); + __ movl($tmp1$$Register, $dst$$Register); + __ decode_heap_oop($tmp1$$Register); + write_barrier_pre(masm, this, + noreg /* obj */, + $tmp1$$Register /* pre_val */, + $tmp2$$Register /* tmp */); + %} + ins_pipe(ialu_reg_mem); // XXX +%} diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 2b29dd14e4b..b55a1208cf2 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -2457,6 +2457,10 @@ bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { mstack.push(m, Visit); // m = ShiftCntV return true; } + if (is_encode_and_store_pattern(n, m)) { + mstack.push(m, Visit); + return true; + } return false; } diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 1b271683bd6..fee265473be 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -4341,6 +4341,7 @@ instruct loadP(rRegP dst, memory mem) // Load Compressed Pointer instruct loadN(rRegN dst, memory mem) %{ + predicate(n->as_Load()->barrier_data() == 0); match(Set dst (LoadN mem)); ins_cost(125); // XXX @@ -5126,6 +5127,7 @@ instruct storeImmP(memory mem, immP31 src) // Store Compressed Pointer instruct storeN(memory mem, rRegN src) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(125); // XXX @@ -5150,7 +5152,7 @@ instruct storeNKlass(memory mem, rRegN src) instruct storeImmN0(memory mem, immN0 zero) %{ - predicate(CompressedOops::base() == nullptr); + predicate(CompressedOops::base() == nullptr && n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem zero)); ins_cost(125); // XXX @@ -5163,6 +5165,7 @@ instruct storeImmN0(memory mem, immN0 zero) instruct storeImmN(memory mem, immN src) %{ + predicate(n->as_Store()->barrier_data() == 0); match(Set mem (StoreN mem src)); ins_cost(150); // XXX @@ -7162,6 +7165,7 @@ instruct compareAndSwapN(rRegI res, memory mem_ptr, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval))); effect(KILL cr, KILL oldval); @@ -7249,6 +7253,7 @@ instruct compareAndExchangeN( memory mem_ptr, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval))); effect(KILL cr); @@ -7470,6 +7475,7 @@ instruct xchgP( memory mem, rRegP newval) %{ %} instruct xchgN( memory mem, rRegN newval) %{ + predicate(n->as_LoadStore()->barrier_data() == 0); match(Set newval (GetAndSetN mem newval)); format %{ "XCHGL $newval,$mem]" %} ins_encode %{ @@ -11659,6 +11665,7 @@ instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem) %{ + predicate(n->in(2)->as_Load()->barrier_data() == 0); match(Set cr (CmpN src (LoadN mem))); format %{ "cmpl $src, $mem\t# compressed ptr" %} @@ -11680,6 +11687,7 @@ instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{ instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src) %{ + predicate(n->in(2)->as_Load()->barrier_data() == 0); match(Set cr (CmpN src (LoadN mem))); format %{ "cmpl $mem, $src\t# compressed ptr" %} @@ -11720,7 +11728,8 @@ instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) %{ - predicate(CompressedOops::base() != nullptr); + predicate(CompressedOops::base() != nullptr && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpN (LoadN mem) zero)); ins_cost(500); // XXX @@ -11733,7 +11742,8 @@ instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero) instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero) %{ - predicate(CompressedOops::base() == nullptr); + predicate(CompressedOops::base() == nullptr && + n->in(1)->as_Load()->barrier_data() == 0); match(Set cr (CmpN (LoadN mem) zero)); format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %} diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp index 13b993546cd..8e17d1d2a7a 100644 --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp @@ -24,49 +24,32 @@ #include "precompiled.hpp" #include "classfile/javaClasses.hpp" +#include "code/vmreg.inline.hpp" #include "gc/g1/c2/g1BarrierSetC2.hpp" #include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" #include "gc/g1/g1BarrierSetRuntime.hpp" #include "gc/g1/g1CardTable.hpp" #include "gc/g1/g1ThreadLocalData.hpp" #include "gc/g1/g1HeapRegion.hpp" #include "opto/arraycopynode.hpp" +#include "opto/block.hpp" #include "opto/compile.hpp" #include "opto/escape.hpp" #include "opto/graphKit.hpp" #include "opto/idealKit.hpp" +#include "opto/machnode.hpp" #include "opto/macro.hpp" +#include "opto/memnode.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "opto/regalloc.hpp" #include "opto/rootnode.hpp" +#include "opto/runtime.hpp" #include "opto/type.hpp" +#include "utilities/growableArray.hpp" #include "utilities/macros.hpp" -const TypeFunc *G1BarrierSetC2::write_ref_field_pre_entry_Type() { - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); - - return TypeFunc::make(domain, range); -} - -const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() { - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); - - return TypeFunc::make(domain, range); -} - -#define __ ideal. /* * Determine if the G1 pre-barrier can be removed. The pre-barrier is * required by SATB to make sure all objects live at the start of the @@ -84,8 +67,6 @@ const TypeFunc *G1BarrierSetC2::write_ref_field_post_entry_Type() { * The compiler needs to determine that the object in which a field is about * to be written is newly allocated, and that no prior store to the same field * has happened since the allocation. - * - * Returns true if the pre-barrier can be removed */ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, @@ -97,34 +78,28 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, AllocateNode* alloc = AllocateNode::Ideal_allocation(base); if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets + return false; // Cannot unalias unless there are precise offsets. } - if (alloc == nullptr) { - return false; // No allocation found + return false; // No allocation found. } intptr_t size_in_bytes = type2aelembytes(bt); - - Node* mem = kit->memory(adr_idx); // start searching here... + Node* mem = kit->memory(adr_idx); // Start searching here. for (int cnt = 0; cnt < 50; cnt++) { - if (mem->is_Store()) { - Node* st_adr = mem->in(MemNode::Address); intptr_t st_offset = 0; Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); if (st_base == nullptr) { - break; // inscrutable pointer + break; // Inscrutable pointer. } - - // Break we have found a store with same base and offset as ours so break if (st_base == base && st_offset == offset) { + // We have found a store with same base and offset as ours. break; } - if (st_offset != offset && st_offset != Type::OffsetBot) { const int MAX_STORE = BytesPerLong; if (st_offset >= offset + size_in_bytes || @@ -136,20 +111,18 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, // in the same sequence of RawMem effects. We sometimes initialize // a whole 'tile' of array elements with a single jint or jlong.) mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory + continue; // Advance through independent store memory. } } - if (st_base != base && MemNode::detect_ptr_independence(base, alloc, st_base, AllocateNode::Ideal_allocation(st_base), phase)) { - // Success: The bases are provably independent. + // Success: the bases are provably independent. mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory + continue; // Advance through independent store memory. } } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - InitializeNode* st_init = mem->in(0)->as_Initialize(); AllocateNode* st_alloc = st_init->allocation(); @@ -157,7 +130,7 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, // The alloc variable is guaranteed to not be null here from earlier check. if (alloc == st_alloc) { // Check that the initialization is storing null so that no previous store - // has been moved up and directly write a reference + // has been moved up and directly write a reference. Node* captured_store = st_init->find_captured_store(offset, type2aelembytes(T_OBJECT), phase); @@ -166,164 +139,55 @@ bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, } } } - // Unless there is an explicit 'continue', we must bail out here, // because 'mem' is an inscrutable memory state (e.g., a call). break; } - return false; } -// G1 pre/post barriers -void G1BarrierSetC2::pre_barrier(GraphKit* kit, - bool do_load, - Node* ctl, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const { - // Some sanity checks - // Note: val is unused in this routine. - - if (do_load) { - // We need to generate the load of the previous value - assert(obj != nullptr, "must have a base"); - assert(adr != nullptr, "where are loading from?"); - assert(pre_val == nullptr, "loaded already?"); - assert(val_type != nullptr, "need a type"); - - if (use_ReduceInitialCardMarks() - && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) { - return; - } - - } else { - // In this case both val_type and alias_idx are unused. - assert(pre_val != nullptr, "must be loaded already"); - // Nothing to be done if pre_val is null. - if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; - assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); - } - assert(bt == T_OBJECT, "or we shouldn't be here"); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); - - BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); - - // Offsets into the thread - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - - // Now the actual pointers into the thread - Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some of the values - Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); - - // if (!marking) - __ if_then(marking, BoolTest::ne, zero, unlikely); { - BasicType index_bt = TypeX_X->basic_type(); - assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); - - if (do_load) { - // load original value - pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx, false, MemNode::unordered, LoadNode::Pinned); - } - - // if (pre_val != nullptr) - __ if_then(pre_val, BoolTest::ne, kit->null()); { - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // is the queue for this thread full? - __ if_then(index, BoolTest::ne, zeroX, likely); { - - // decrement the index - Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - - // Now get the buffer location we will log the previous value into and store it - Node *log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); - // update the index - __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - - // logging buffer is full, call the runtime - const TypeFunc *tf = write_ref_field_pre_entry_Type(); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), "write_ref_field_pre_entry", pre_val, tls); - } __ end_if(); // (!index) - } __ end_if(); // (pre_val != nullptr) - } __ end_if(); // (!marking) - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - /* - * G1 similar to any GC with a Young Generation requires a way to keep track of - * references from Old Generation to Young Generation to make sure all live + * G1, similar to any GC with a Young Generation, requires a way to keep track + * of references from Old Generation to Young Generation to make sure all live * objects are found. G1 also requires to keep track of object references * between different regions to enable evacuation of old regions, which is done - * as part of mixed collections. References are tracked in remembered sets and - * is continuously updated as reference are written to with the help of the - * post-barrier. + * as part of mixed collections. References are tracked in remembered sets, + * which are continuously updated as references are written to with the help of + * the post-barrier. * - * To reduce the number of updates to the remembered set the post-barrier - * filters updates to fields in objects located in the Young Generation, - * the same region as the reference, when the null is being written or - * if the card is already marked as dirty by an earlier write. + * To reduce the number of updates to the remembered set, the post-barrier + * filters out updates to fields in objects located in the Young Generation, the + * same region as the reference, when null is being written, or if the card is + * already marked as dirty by an earlier write. * * Under certain circumstances it is possible to avoid generating the - * post-barrier completely if it is possible during compile time to prove - * the object is newly allocated and that no safepoint exists between the - * allocation and the store. + * post-barrier completely, if it is possible during compile time to prove the + * object is newly allocated and that no safepoint exists between the allocation + * and the store. This can be seen as a compile-time version of the + * above-mentioned Young Generation filter. * - * In the case of slow allocation the allocation code must handle the barrier - * as part of the allocation in the case the allocated object is not located - * in the nursery; this would happen for humongous objects. - * - * Returns true if the post barrier can be removed + * In the case of a slow allocation, the allocation code must handle the barrier + * as part of the allocation if the allocated object is not located in the + * nursery; this would happen for humongous objects. */ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit, - PhaseValues* phase, Node* store, + PhaseValues* phase, Node* store_ctrl, Node* adr) const { intptr_t offset = 0; Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); AllocateNode* alloc = AllocateNode::Ideal_allocation(base); if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets + return false; // Cannot unalias unless there are precise offsets. } - if (alloc == nullptr) { - return false; // No allocation found + return false; // No allocation found. } - // Start search from Store node - Node* mem = store->in(MemNode::Control); + Node* mem = store_ctrl; // Start search from Store node. if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - InitializeNode* st_init = mem->in(0)->as_Initialize(); AllocateNode* st_alloc = st_init->allocation(); - // Make sure we are looking at the same allocation if (alloc == st_alloc) { return true; @@ -333,725 +197,361 @@ bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit, return false; } -// -// Update the card table and add card address to the queue -// -void G1BarrierSetC2::g1_mark_card(GraphKit* kit, - IdealKit& ideal, - Node* card_adr, - Node* oop_store, - uint oop_alias_idx, - Node* index, - Node* index_adr, - Node* buffer, - const TypeFunc* tf) const { - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - Node* no_base = __ top(); - BasicType card_bt = T_BYTE; - // Smash zero into card. MUST BE ORDERED WRT TO STORE - __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw); - - // Now do the queue work - __ if_then(index, BoolTest::ne, zeroX); { - - Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - Node* log_addr = __ AddP(no_base, buffer, next_index); - - // Order, see storeCM. - __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered); - __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), "write_ref_field_post_entry", card_adr, __ thread()); - } __ end_if(); - -} - -void G1BarrierSetC2::post_barrier(GraphKit* kit, - Node* ctl, - Node* oop_store, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - BasicType bt, - bool use_precise) const { - // If we are writing a null then we need no post barrier - - if (val != nullptr && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { - // Must be null - const Type* t = val->bottom_type(); - assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be null"); - // No post barrier if writing null - return; - } - - if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) { - // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with CardTableBarrierSet::on_slowpath_allocation_exit. - // That routine informs GC to take appropriate compensating steps, - // upon a slow-path allocation, so as to make this card-mark - // elision safe. - return; - } - - if (use_ReduceInitialCardMarks() - && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) { - return; - } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; - } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != nullptr, ""); - - IdealKit ideal(kit, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - float likely = PROB_LIKELY_MAG(3); - float unlikely = PROB_UNLIKELY_MAG(3); - Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val()); - Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val()); - Node* zeroX = __ ConX(0); - - const TypeFunc *tf = write_ref_field_post_entry_Type(); - - // Offsets into the thread - const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); - - // Pointers into the thread - - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some values - // Use ctrl to avoid hoisting these values past a safepoint, which could - // potentially reset these fields in the JavaThread. - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw); - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // Convert the store obj pointer to an int prior to doing math on it - // Must use ctrl to prevent "integerized oop" existing across safepoint - Node* cast = __ CastPX(__ ctrl(), adr); - - // Divide pointer by card size - Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift()) ); - - // Combine card table base and card offset - Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset ); - - // If we know the value being stored does it cross regions? - - if (val != nullptr) { - // Does the store cause us to cross regions? - - // Should be able to do an unsigned compare of region_size instead of - // and extra shift. Do we have an unsigned compare?? - // Node* region_size = __ ConI(1 << G1HeapRegion::LogOfHRGrainBytes); - Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(checked_cast(G1HeapRegion::LogOfHRGrainBytes))); - - // if (xor_res == 0) same region so skip - __ if_then(xor_res, BoolTest::ne, zeroX, likely); { - - // No barrier if we are storing a null. - __ if_then(val, BoolTest::ne, kit->null(), likely); { - - // Ok must mark the card if not already dirty - - // load the original value of the card - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - - __ if_then(card_val, BoolTest::ne, young_card, unlikely); { - kit->sync_kit(ideal); - kit->insert_mem_bar(Op_MemBarVolatile, oop_store); - __ sync_kit(kit); - - Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val_reload, BoolTest::ne, dirty_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } else { - // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. - // We don't need a barrier here if the destination is a newly allocated object - // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden - // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()). - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val, BoolTest::ne, young_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); - } - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - -// Helper that guards and inserts a pre-barrier. -void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, - Node* pre_val, bool need_mem_bar) const { - // We could be accessing the referent field of a reference object. If so, when G1 - // is enabled, we need to log the value in the referent field in an SATB buffer. - // This routine performs some compile time filters and generates suitable - // runtime filters that guard the pre-barrier code. - // Also add memory barrier for non volatile load from the referent field - // to prevent commoning of loads across safepoint. - - // Some compile time checks. - - // If offset is a constant, is it java_lang_ref_Reference::_reference_offset? - const TypeX* otype = offset->find_intptr_t_type(); - if (otype != nullptr && otype->is_con() && - otype->get_con() != java_lang_ref_Reference::referent_offset()) { - // Constant offset but not the reference_offset so just return - return; - } - - // We only need to generate the runtime guards for instances. - const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr(); - if (btype != nullptr) { - if (btype->isa_aryptr()) { - // Array type so nothing to do - return; - } - - const TypeInstPtr* itype = btype->isa_instptr(); - if (itype != nullptr) { - // Can the klass of base_oop be statically determined to be - // _not_ a sub-class of Reference and _not_ Object? - ciKlass* klass = itype->instance_klass(); - if (klass->is_loaded() && - !klass->is_subtype_of(kit->env()->Reference_klass()) && - !kit->env()->Object_klass()->is_subtype_of(klass)) { - return; - } - } - } - - // The compile time filters did not reject base_oop/offset so - // we need to generate the following runtime filters - // - // if (offset == java_lang_ref_Reference::_reference_offset) { - // if (instance_of(base, java.lang.ref.Reference)) { - // pre_barrier(_, pre_val, ...); - // } - // } - - float likely = PROB_LIKELY( 0.999); - float unlikely = PROB_UNLIKELY(0.999); - - IdealKit ideal(kit); - - Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset()); - - __ if_then(offset, BoolTest::eq, referent_off, unlikely); { - // Update graphKit memory and control from IdealKit. - kit->sync_kit(ideal); - - Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass())); - Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con); - - // Update IdealKit memory and control from graphKit. - __ sync_kit(kit); - - Node* one = __ ConI(1); - // is_instof == 0 if base_oop == nullptr - __ if_then(is_instof, BoolTest::eq, one, unlikely); { - - // Update graphKit from IdeakKit. - kit->sync_kit(ideal); - - // Use the pre-barrier to record the value in the referent field - pre_barrier(kit, false /* do_load */, - __ ctrl(), - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - pre_val /* pre_val */, - T_OBJECT); - if (need_mem_bar) { - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } - // Update IdealKit from graphKit. - __ sync_kit(kit); - - } __ end_if(); // _ref_type != ref_none - } __ end_if(); // offset == referent_offset - - // Final sync IdealKit and GraphKit. - kit->final_sync(ideal); -} - -#undef __ - Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { DecoratorSet decorators = access.decorators(); - Node* adr = access.addr().node(); - Node* obj = access.base(); - - bool anonymous = (decorators & C2_UNSAFE_ACCESS) != 0; - bool mismatched = (decorators & C2_MISMATCHED) != 0; - bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0; - bool in_heap = (decorators & IN_HEAP) != 0; - bool in_native = (decorators & IN_NATIVE) != 0; bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; - bool is_unordered = (decorators & MO_UNORDERED) != 0; bool no_keepalive = (decorators & AS_NO_KEEPALIVE) != 0; - bool is_mixed = !in_heap && !in_native; - bool need_cpu_mem_bar = !is_unordered || mismatched || is_mixed; - - Node* top = Compile::current()->top(); - Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : top; - - // If we are reading the value of the referent field of a Reference - // object (either by using Unsafe directly or through reflection) - // then, if G1 is enabled, we need to record the referent in an - // SATB log buffer using the pre-barrier mechanism. - // Also we need to add memory barrier to prevent commoning reads - // from this field across safepoint since GC can change its value. - bool need_read_barrier = (((on_weak || on_phantom) && !no_keepalive) || - (in_heap && unknown && offset != top && obj != top)); - - if (!access.is_oop() || !need_read_barrier) { - return CardTableBarrierSetC2::load_at_resolved(access, val_type); + // If we are reading the value of the referent field of a Reference object, we + // need to record the referent in an SATB log buffer using the pre-barrier + // mechanism. Also we need to add a memory barrier to prevent commoning reads + // from this field across safepoints, since GC can change its value. + bool need_read_barrier = ((on_weak || on_phantom) && !no_keepalive); + if (access.is_oop() && need_read_barrier) { + access.set_barrier_data(G1C2BarrierPre); } - - assert(access.is_parse_access(), "entry not supported at optimization time"); - - C2ParseAccess& parse_access = static_cast(access); - GraphKit* kit = parse_access.kit(); - Node* load; - - Node* control = kit->control(); - const TypePtr* adr_type = access.addr().type(); - MemNode::MemOrd mo = access.mem_node_mo(); - bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; - bool unaligned = (decorators & C2_UNALIGNED) != 0; - bool unsafe = (decorators & C2_UNSAFE_ACCESS) != 0; - // Pinned control dependency is the strictest. So it's ok to substitute it for any other. - load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, - LoadNode::Pinned, requires_atomic_access, unaligned, mismatched, unsafe, - access.barrier_data()); - - - if (on_weak || on_phantom) { - // Use the pre-barrier to record the value in the referent field - pre_barrier(kit, false /* do_load */, - kit->control(), - nullptr /* obj */, nullptr /* adr */, max_juint /* alias_idx */, nullptr /* val */, nullptr /* val_type */, - load /* pre_val */, T_OBJECT); - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - kit->insert_mem_bar(Op_MemBarCPUOrder); - } else if (unknown) { - // We do not require a mem bar inside pre_barrier if need_mem_bar - // is set: the barriers would be emitted by us. - insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar); - } - - return load; -} - -bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const { - if (CardTableBarrierSetC2::is_gc_barrier_node(node)) { - return true; - } - if (node->Opcode() != Op_CallLeaf) { - return false; - } - CallLeafNode *call = node->as_CallLeaf(); - if (call->_name == nullptr) { - return false; - } - - return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0; -} - -bool G1BarrierSetC2::is_g1_pre_val_load(Node* n) { - if (n->is_Load() && n->as_Load()->has_pinned_control_dependency()) { - // Make sure the only users of it are: CmpP, StoreP, and a call to write_ref_field_pre_entry - - // Skip possible decode - if (n->outcnt() == 1 && n->unique_out()->is_DecodeN()) { - n = n->unique_out(); - } - - if (n->outcnt() == 3) { - int found = 0; - for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) { - Node* use = iter.get(); - if (use->is_Cmp() || use->is_Store()) { - ++found; - } else if (use->is_CallLeaf()) { - CallLeafNode* call = use->as_CallLeaf(); - if (strcmp(call->_name, "write_ref_field_pre_entry") == 0) { - ++found; - } - } - } - if (found == 3) { - return true; - } - } - } - return false; -} - -bool G1BarrierSetC2::is_gc_pre_barrier_node(Node *node) const { - return is_g1_pre_val_load(node); + return CardTableBarrierSetC2::load_at_resolved(access, val_type); } void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { - if (is_g1_pre_val_load(node)) { - macro->replace_node(node, macro->zerocon(node->as_Load()->bottom_type()->basic_type())); - } else { - assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); - assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes"); - // It could be only one user, URShift node, in Object.clone() intrinsic - // but the new allocation is passed to arraycopy stub and it could not - // be scalar replaced. So we don't check the case. + eliminate_gc_barrier_data(node); +} - // An other case of only one user (Xor) is when the value check for null - // in G1 post barrier is folded after CCP so the code which used URShift - // is removed. - - // Take Region node before eliminating post barrier since it also - // eliminates CastP2X node when it has only one user. - Node* this_region = node->in(0); - assert(this_region != nullptr, ""); - - // Remove G1 post barrier. - - // Search for CastP2X->Xor->URShift->Cmp path which - // checks if the store done to a different from the value's region. - // And replace Cmp with #0 (false) to collapse G1 post barrier. - Node* xorx = node->find_out_with(Op_XorX); - if (xorx != nullptr) { - Node* shift = xorx->unique_out(); - Node* cmpx = shift->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing region check in G1 post barrier"); - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); - - // Remove G1 pre barrier. - - // Search "if (marking != 0)" check and set it to "false". - // There is no G1 pre barrier if previous stored value is null - // (for example, after initialization). - if (this_region->is_Region() && this_region->req() == 3) { - int ind = 1; - if (!this_region->in(ind)->is_IfFalse()) { - ind = 2; - } - if (this_region->in(ind)->is_IfFalse() && - this_region->in(ind)->in(0)->Opcode() == Op_If) { - Node* bol = this_region->in(ind)->in(0)->in(1); - assert(bol->is_Bool(), ""); - cmpx = bol->in(1); - if (bol->as_Bool()->_test._test == BoolTest::ne && - cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) && - cmpx->in(1)->is_Load()) { - Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() && - adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && - adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) { - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); - } - } - } - } - } else { - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - // This is a G1 post barrier emitted by the Object.clone() intrinsic. - // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card - // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier. - Node* shift = node->find_out_with(Op_URShiftX); - assert(shift != nullptr, "missing G1 post barrier"); - Node* addp = shift->unique_out(); - Node* load = addp->find_out_with(Op_LoadB); - assert(load != nullptr, "missing G1 post barrier"); - Node* cmpx = load->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing card value check in G1 post barrier"); - macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); - // There is no G1 pre barrier in this case - } - // Now CastP2X can be removed since it is used only on dead path - // which currently still alive until igvn optimize it. - assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, ""); - macro->replace_node(node, macro->top()); +void G1BarrierSetC2::eliminate_gc_barrier_data(Node* node) const { + if (node->is_LoadStore()) { + LoadStoreNode* loadstore = node->as_LoadStore(); + loadstore->set_barrier_data(0); + } else if (node->is_Mem()) { + MemNode* mem = node->as_Mem(); + mem->set_barrier_data(0); } } -Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const { - if (!use_ReduceInitialCardMarks() && - c != nullptr && c->is_Region() && c->req() == 3) { - for (uint i = 1; i < c->req(); i++) { - if (c->in(i) != nullptr && c->in(i)->is_Region() && - c->in(i)->req() == 3) { - Node* r = c->in(i); - for (uint j = 1; j < r->req(); j++) { - if (r->in(j) != nullptr && r->in(j)->is_Proj() && - r->in(j)->in(0) != nullptr && - r->in(j)->in(0)->Opcode() == Op_CallLeaf && - r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry)) { - Node* call = r->in(j)->in(0); - c = c->in(i == 1 ? 2 : 1); - if (c != nullptr && c->Opcode() != Op_Parm) { - c = c->in(0); - if (c != nullptr) { - c = c->in(0); - assert(call->in(0) == nullptr || - call->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0)->in(0) == nullptr || - call->in(0)->in(0)->in(0)->in(0)->in(0) == nullptr || - c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape"); - return c; - } - } - } - } - } - } +static void refine_barrier_by_new_val_type(const Node* n) { + if (n->Opcode() != Op_StoreP && + n->Opcode() != Op_StoreN) { + return; } - return c; + MemNode* store = n->as_Mem(); + const Node* newval = n->in(MemNode::ValueIn); + assert(newval != nullptr, ""); + const Type* newval_bottom = newval->bottom_type(); + TypePtr::PTR newval_type = newval_bottom->make_ptr()->ptr(); + uint8_t barrier_data = store->barrier_data(); + if (!newval_bottom->isa_oopptr() && + !newval_bottom->isa_narrowoop() && + newval_type != TypePtr::Null) { + // newval is neither an OOP nor null, so there is no barrier to refine. + assert(barrier_data == 0, "non-OOP stores should have no barrier data"); + return; + } + if (barrier_data == 0) { + // No barrier to refine. + return; + } + if (newval_type == TypePtr::Null) { + // Simply elide post-barrier if writing null. + barrier_data &= ~G1C2BarrierPost; + barrier_data &= ~G1C2BarrierPostNotNull; + } else if (((barrier_data & G1C2BarrierPost) != 0) && + newval_type == TypePtr::NotNull) { + // If the post-barrier has not been elided yet (e.g. due to newval being + // freshly allocated), mark it as not-null (simplifies barrier tests and + // compressed OOPs logic). + barrier_data |= G1C2BarrierPostNotNull; + } + store->set_barrier_data(barrier_data); + return; } -#ifdef ASSERT -bool G1BarrierSetC2::has_cas_in_use_chain(Node *n) const { - Unique_Node_List visited; +// Refine (not really expand) G1 barriers by looking at the new value type +// (whether it is necessarily null or necessarily non-null). +bool G1BarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { + ResourceMark rm; + VectorSet visited; Node_List worklist; - worklist.push(n); + worklist.push(C->root()); while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (visited.member(x)) { + Node* n = worklist.pop(); + if (visited.test_set(n->_idx)) { continue; - } else { - visited.push(x); } - - if (x->is_LoadStore()) { - int op = x->Opcode(); - if (op == Op_CompareAndExchangeP || op == Op_CompareAndExchangeN || - op == Op_CompareAndSwapP || op == Op_CompareAndSwapN || - op == Op_WeakCompareAndSwapP || op == Op_WeakCompareAndSwapN) { - return true; - } - } - if (!x->is_CFG()) { - for (SimpleDUIterator iter(x); iter.has_next(); iter.next()) { - Node* use = iter.get(); - worklist.push(use); + refine_barrier_by_new_val_type(n); + for (uint j = 0; j < n->req(); j++) { + Node* in = n->in(j); + if (in != nullptr) { + worklist.push(in); } } } return false; } -void G1BarrierSetC2::verify_pre_load(Node* marking_if, Unique_Node_List& loads /*output*/) const { - assert(loads.size() == 0, "Loads list should be empty"); - Node* pre_val_if = marking_if->find_out_with(Op_IfTrue)->find_out_with(Op_If); - if (pre_val_if != nullptr) { - Unique_Node_List visited; - Node_List worklist; - Node* pre_val = pre_val_if->in(1)->in(1)->in(1); - - worklist.push(pre_val); - while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } - - if (has_cas_in_use_chain(x)) { - loads.clear(); - return; - } - - if (x->is_Con()) { - continue; - } - if (x->is_EncodeP() || x->is_DecodeN()) { - worklist.push(x->in(1)); - continue; - } - if (x->is_Load() || x->is_LoadStore()) { - assert(x->in(0) != nullptr, "Pre-val load has to have a control"); - loads.push(x); - continue; - } - if (x->is_Phi()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - continue; - } - assert(false, "Pre-val anomaly"); - } +uint G1BarrierSetC2::estimated_barrier_size(const Node* node) const { + // These Ideal node counts are extracted from the pre-matching Ideal graph + // generated when compiling the following method with early barrier expansion: + // static void write(MyObject obj1, Object o) { + // obj1.o1 = o; + // } + uint8_t barrier_data = MemNode::barrier_data(node); + uint nodes = 0; + if ((barrier_data & G1C2BarrierPre) != 0) { + nodes += 50; } + if ((barrier_data & G1C2BarrierPost) != 0) { + nodes += 60; + } + return nodes; } -void G1BarrierSetC2::verify_no_safepoints(Compile* compile, Node* marking_check_if, const Unique_Node_List& loads) const { - if (loads.size() == 0) { +bool G1BarrierSetC2::can_initialize_object(const StoreNode* store) const { + assert(store->Opcode() == Op_StoreP || store->Opcode() == Op_StoreN, "OOP store expected"); + // It is OK to move the store across the object initialization boundary only + // if it does not have any barrier, or if it has barriers that can be safely + // elided (because of the compensation steps taken on the allocation slow path + // when ReduceInitialCardMarks is enabled). + return (MemNode::barrier_data(store) == 0) || use_ReduceInitialCardMarks(); +} + +void G1BarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* ac) const { + if (ac->is_clone_inst() && !use_ReduceInitialCardMarks()) { + clone_in_runtime(phase, ac, G1BarrierSetRuntime::clone_addr(), "G1BarrierSetRuntime::clone"); return; } + BarrierSetC2::clone_at_expansion(phase, ac); +} - if (loads.size() == 1) { // Handle the typical situation when there a single pre-value load - // that is dominated by the marking_check_if, that's true when the - // barrier itself does the pre-val load. - Node *pre_val = loads.at(0); - if (pre_val->in(0)->in(0) == marking_check_if) { // IfTrue->If +Node* G1BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { + DecoratorSet decorators = access.decorators(); + bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool in_heap = (decorators & IN_HEAP) != 0; + bool tightly_coupled_alloc = (decorators & C2_TIGHTLY_COUPLED_ALLOC) != 0; + bool need_store_barrier = !(tightly_coupled_alloc && use_ReduceInitialCardMarks()) && (in_heap || anonymous); + if (access.is_oop() && need_store_barrier) { + access.set_barrier_data(get_store_barrier(access)); + if (tightly_coupled_alloc) { + assert(!use_ReduceInitialCardMarks(), + "post-barriers are only needed for tightly-coupled initialization stores when ReduceInitialCardMarks is disabled"); + access.set_barrier_data(access.barrier_data() ^ G1C2BarrierPre); + } + } + return BarrierSetC2::store_at_resolved(access, val); +} + +Node* G1BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); + } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); +} + +Node* G1BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); +} + +Node* G1BarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + if (!access.is_oop()) { + return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type); + } + access.set_barrier_data(G1C2BarrierPre | G1C2BarrierPost); + return BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type); +} + +class G1BarrierSetC2State : public BarrierSetC2State { +private: + GrowableArray* _stubs; + +public: + G1BarrierSetC2State(Arena* arena) + : BarrierSetC2State(arena), + _stubs(new (arena) GrowableArray(arena, 8, 0, nullptr)) {} + + GrowableArray* stubs() { + return _stubs; + } + + bool needs_liveness_data(const MachNode* mach) const { + return G1PreBarrierStubC2::needs_barrier(mach) || + G1PostBarrierStubC2::needs_barrier(mach); + } + + bool needs_livein_data() const { + return false; + } +}; + +static G1BarrierSetC2State* barrier_set_state() { + return reinterpret_cast(Compile::current()->barrier_set_state()); +} + +G1BarrierStubC2::G1BarrierStubC2(const MachNode* node) : BarrierStubC2(node) {} + +G1PreBarrierStubC2::G1PreBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {} + +bool G1PreBarrierStubC2::needs_barrier(const MachNode* node) { + return (node->barrier_data() & G1C2BarrierPre) != 0; +} + +G1PreBarrierStubC2* G1PreBarrierStubC2::create(const MachNode* node) { + G1PreBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PreBarrierStubC2(node); + if (!Compile::current()->output()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); + } + return stub; +} + +void G1PreBarrierStubC2::initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1, Register tmp2) { + _obj = obj; + _pre_val = pre_val; + _thread = thread; + _tmp1 = tmp1; + _tmp2 = tmp2; +} + +Register G1PreBarrierStubC2::obj() const { + return _obj; +} + +Register G1PreBarrierStubC2::pre_val() const { + return _pre_val; +} + +Register G1PreBarrierStubC2::thread() const { + return _thread; +} + +Register G1PreBarrierStubC2::tmp1() const { + return _tmp1; +} + +Register G1PreBarrierStubC2::tmp2() const { + return _tmp2; +} + +void G1PreBarrierStubC2::emit_code(MacroAssembler& masm) { + G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + bs->generate_c2_pre_barrier_stub(&masm, this); +} + +G1PostBarrierStubC2::G1PostBarrierStubC2(const MachNode* node) : G1BarrierStubC2(node) {} + +bool G1PostBarrierStubC2::needs_barrier(const MachNode* node) { + return (node->barrier_data() & G1C2BarrierPost) != 0; +} + +G1PostBarrierStubC2* G1PostBarrierStubC2::create(const MachNode* node) { + G1PostBarrierStubC2* const stub = new (Compile::current()->comp_arena()) G1PostBarrierStubC2(node); + if (!Compile::current()->output()->in_scratch_emit_size()) { + barrier_set_state()->stubs()->append(stub); + } + return stub; +} + +void G1PostBarrierStubC2::initialize_registers(Register thread, Register tmp1, Register tmp2, Register tmp3) { + _thread = thread; + _tmp1 = tmp1; + _tmp2 = tmp2; + _tmp3 = tmp3; +} + +Register G1PostBarrierStubC2::thread() const { + return _thread; +} + +Register G1PostBarrierStubC2::tmp1() const { + return _tmp1; +} + +Register G1PostBarrierStubC2::tmp2() const { + return _tmp2; +} + +Register G1PostBarrierStubC2::tmp3() const { + return _tmp3; +} + +void G1PostBarrierStubC2::emit_code(MacroAssembler& masm) { + G1BarrierSetAssembler* bs = static_cast(BarrierSet::barrier_set()->barrier_set_assembler()); + bs->generate_c2_post_barrier_stub(&masm, this); +} + +void* G1BarrierSetC2::create_barrier_state(Arena* comp_arena) const { + return new (comp_arena) G1BarrierSetC2State(comp_arena); +} + +int G1BarrierSetC2::get_store_barrier(C2Access& access) const { + if (!access.is_parse_access()) { + // Only support for eliding barriers at parse time for now. + return G1C2BarrierPre | G1C2BarrierPost; + } + GraphKit* kit = (static_cast(access)).kit(); + Node* ctl = kit->control(); + Node* adr = access.addr().node(); + uint adr_idx = kit->C->get_alias_index(access.addr().type()); + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory"); + + bool can_remove_pre_barrier = g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, access.type(), adr_idx); + + // We can skip marks on a freshly-allocated object in Eden. Keep this code in + // sync with CardTableBarrierSet::on_slowpath_allocation_exit. That routine + // informs GC to take appropriate compensating steps, upon a slow-path + // allocation, so as to make this card-mark elision safe. + // The post-barrier can also be removed if null is written. This case is + // handled by G1BarrierSetC2::expand_barriers, which runs at the end of C2's + // platform-independent optimizations to exploit stronger type information. + bool can_remove_post_barrier = use_ReduceInitialCardMarks() && + ((access.base() == kit->just_allocated_object(ctl)) || + g1_can_remove_post_barrier(kit, &kit->gvn(), ctl, adr)); + + int barriers = 0; + if (!can_remove_pre_barrier) { + barriers |= G1C2BarrierPre; + } + if (!can_remove_post_barrier) { + barriers |= G1C2BarrierPost; + } + + return barriers; +} + +void G1BarrierSetC2::late_barrier_analysis() const { + compute_liveness_at_stubs(); +} + +void G1BarrierSetC2::emit_stubs(CodeBuffer& cb) const { + MacroAssembler masm(&cb); + GrowableArray* const stubs = barrier_set_state()->stubs(); + for (int i = 0; i < stubs->length(); i++) { + // Make sure there is enough space in the code buffer + if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == nullptr) { + ciEnv::current()->record_failure("CodeCache is full"); return; } + stubs->at(i)->emit_code(masm); } - - // All other cases are when pre-value loads dominate the marking check. - Unique_Node_List controls; - for (uint i = 0; i < loads.size(); i++) { - Node *c = loads.at(i)->in(0); - controls.push(c); - } - - Unique_Node_List visited; - Unique_Node_List safepoints; - Node_List worklist; - uint found = 0; - - worklist.push(marking_check_if); - while (worklist.size() > 0 && found < controls.size()) { - Node* x = worklist.pop(); - if (x == nullptr || x == compile->top()) continue; - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } - - if (controls.member(x)) { - found++; - } - if (x->is_Region()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - } else { - if (!x->is_SafePoint()) { - worklist.push(x->in(0)); - } else { - safepoints.push(x); - } - } - } - assert(found == controls.size(), "Pre-barrier structure anomaly or possible safepoint"); + masm.flush(); } -void G1BarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const { - if (phase != BarrierSetC2::BeforeCodeGen) { - return; +#ifndef PRODUCT +void G1BarrierSetC2::dump_barrier_data(const MachNode* mach, outputStream* st) const { + if ((mach->barrier_data() & G1C2BarrierPre) != 0) { + st->print("pre "); } - // Verify G1 pre-barriers - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - - Unique_Node_List visited; - Node_List worklist; - // We're going to walk control flow backwards starting from the Root - worklist.push(compile->root()); - while (worklist.size() > 0) { - Node* x = worklist.pop(); - if (x == nullptr || x == compile->top()) continue; - if (visited.member(x)) { - continue; - } else { - visited.push(x); - } - - if (x->is_Region()) { - for (uint i = 1; i < x->req(); i++) { - worklist.push(x->in(i)); - } - } else { - worklist.push(x->in(0)); - // We are looking for the pattern: - // /->ThreadLocal - // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset) - // \->ConI(0) - // We want to verify that the If and the LoadB have the same control - // See GraphKit::g1_write_barrier_pre() - if (x->is_If()) { - IfNode *iff = x->as_If(); - if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) { - CmpNode *cmp = iff->in(1)->in(1)->as_Cmp(); - if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0 - && cmp->in(1)->is_Load()) { - LoadNode* load = cmp->in(1)->as_Load(); - if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal - && load->in(2)->in(3)->is_Con() - && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) { - - Node* if_ctrl = iff->in(0); - Node* load_ctrl = load->in(0); - - if (if_ctrl != load_ctrl) { - // Skip possible CProj->NeverBranch in infinite loops - if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj) - && if_ctrl->in(0)->is_NeverBranch()) { - if_ctrl = if_ctrl->in(0)->in(0); - } - } - assert(load_ctrl != nullptr && if_ctrl == load_ctrl, "controls must match"); - - Unique_Node_List loads; - verify_pre_load(iff, loads); - verify_no_safepoints(compile, iff, loads); - } - } - } - } - } + if ((mach->barrier_data() & G1C2BarrierPost) != 0) { + st->print("post "); + } + if ((mach->barrier_data() & G1C2BarrierPostNotNull) != 0) { + st->print("notnull "); } } -#endif - -bool G1BarrierSetC2::escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const { - if (opcode == Op_StoreP) { - Node* adr = n->in(MemNode::Address); - const Type* adr_type = gvn->type(adr); - // Pointer stores in G1 barriers looks like unsafe access. - // Ignore such stores to be able scalar replace non-escaping - // allocations. - if (adr_type->isa_rawptr() && adr->is_AddP()) { - Node* base = conn_graph->get_addp_base(adr); - if (base->Opcode() == Op_LoadP && - base->in(MemNode::Address)->is_AddP()) { - adr = base->in(MemNode::Address); - Node* tls = conn_graph->get_addp_base(adr); - if (tls->Opcode() == Op_ThreadLocal) { - int offs = (int) gvn->find_intptr_t_con(adr->in(AddPNode::Offset), Type::OffsetBot); - const int buf_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - if (offs == buf_offset) { - return true; // G1 pre barrier previous oop value store. - } - if (offs == in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())) { - return true; // G1 post barrier card address store. - } - } - } - } - } - return false; -} +#endif // !PRODUCT diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp index c445a87d2e4..dc333d8c331 100644 --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp @@ -31,29 +31,62 @@ class PhaseTransform; class Type; class TypeFunc; +const int G1C2BarrierPre = 1; +const int G1C2BarrierPost = 2; +const int G1C2BarrierPostNotNull = 4; + +class G1BarrierStubC2 : public BarrierStubC2 { +public: + G1BarrierStubC2(const MachNode* node); + virtual void emit_code(MacroAssembler& masm) = 0; +}; + +class G1PreBarrierStubC2 : public G1BarrierStubC2 { +private: + Register _obj; + Register _pre_val; + Register _thread; + Register _tmp1; + Register _tmp2; + +protected: + G1PreBarrierStubC2(const MachNode* node); + +public: + static bool needs_barrier(const MachNode* node); + static G1PreBarrierStubC2* create(const MachNode* node); + void initialize_registers(Register obj, Register pre_val, Register thread, Register tmp1 = noreg, Register tmp2 = noreg); + Register obj() const; + Register pre_val() const; + Register thread() const; + Register tmp1() const; + Register tmp2() const; + virtual void emit_code(MacroAssembler& masm); +}; + +class G1PostBarrierStubC2 : public G1BarrierStubC2 { +private: + Register _thread; + Register _tmp1; + Register _tmp2; + Register _tmp3; + +protected: + G1PostBarrierStubC2(const MachNode* node); + +public: + static bool needs_barrier(const MachNode* node); + static G1PostBarrierStubC2* create(const MachNode* node); + void initialize_registers(Register thread, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg); + Register thread() const; + Register tmp1() const; + Register tmp2() const; + Register tmp3() const; + virtual void emit_code(MacroAssembler& masm); +}; + class G1BarrierSetC2: public CardTableBarrierSetC2 { protected: - virtual void pre_barrier(GraphKit* kit, - bool do_load, - Node* ctl, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) const; - - virtual void post_barrier(GraphKit* kit, - Node* ctl, - Node* store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - BasicType bt, - bool use_precise) const; - bool g1_can_remove_pre_barrier(GraphKit* kit, PhaseValues* phase, Node* adr, @@ -64,44 +97,31 @@ protected: PhaseValues* phase, Node* store, Node* adr) const; - void g1_mark_card(GraphKit* kit, - IdealKit& ideal, - Node* card_adr, - Node* oop_store, - uint oop_alias_idx, - Node* index, - Node* index_adr, - Node* buffer, - const TypeFunc* tf) const; - - // Helper for unsafe accesses, that may or may not be on the referent field. - // Generates the guards that check whether the result of - // Unsafe.getReference should be recorded in an SATB log buffer. - void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const; - - static const TypeFunc* write_ref_field_pre_entry_Type(); - static const TypeFunc* write_ref_field_post_entry_Type(); + int get_store_barrier(C2Access& access) const; virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const; + virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const; + virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* value_type) const; -#ifdef ASSERT - bool has_cas_in_use_chain(Node* x) const; - void verify_pre_load(Node* marking_check_if, Unique_Node_List& loads /*output*/) const; - void verify_no_safepoints(Compile* compile, Node* marking_load, const Unique_Node_List& loads) const; -#endif - - static bool is_g1_pre_val_load(Node* n); public: - virtual bool is_gc_pre_barrier_node(Node* node) const; - virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; - virtual Node* step_over_gc_barrier(Node* c) const; + virtual void eliminate_gc_barrier_data(Node* node) const; + virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; + virtual uint estimated_barrier_size(const Node* node) const; + virtual bool can_initialize_object(const StoreNode* store) const; + virtual void clone_at_expansion(PhaseMacroExpand* phase, + ArrayCopyNode* ac) const; + virtual void* create_barrier_state(Arena* comp_arena) const; + virtual void emit_stubs(CodeBuffer& cb) const; + virtual void late_barrier_analysis() const; -#ifdef ASSERT - virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; +#ifndef PRODUCT + virtual void dump_barrier_data(const MachNode* mach, outputStream* st) const; #endif - - virtual bool escape_add_to_con_graph(ConnectionGraph* conn_graph, PhaseGVN* gvn, Unique_Node_List* delayed_worklist, Node* n, uint opcode) const; }; #endif // SHARE_GC_G1_C2_G1BARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp index a0fce437807..2e247f46c93 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp +++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.cpp @@ -61,3 +61,11 @@ JRT_LEAF(void, G1BarrierSetRuntime::write_ref_field_post_entry(volatile G1CardTa G1DirtyCardQueue& queue = G1ThreadLocalData::dirty_card_queue(thread); G1BarrierSet::dirty_card_queue_set().enqueue(queue, card_addr); JRT_END + +JRT_LEAF(void, G1BarrierSetRuntime::clone(oopDesc* src, oopDesc* dst, size_t size)) + HeapAccess<>::clone(src, dst, size); +JRT_END + +address G1BarrierSetRuntime::clone_addr() { + return reinterpret_cast
(clone); +} diff --git a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp index 366679f032b..f98e94096e7 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp +++ b/src/hotspot/share/gc/g1/g1BarrierSetRuntime.hpp @@ -35,6 +35,8 @@ class oopDesc; class JavaThread; class G1BarrierSetRuntime: public AllStatic { +private: + static void clone(oopDesc* src, oopDesc* dst, size_t size); public: using CardValue = G1CardTable::CardValue; @@ -46,6 +48,8 @@ public: // C2 slow-path runtime calls. static void write_ref_field_pre_entry(oopDesc* orig, JavaThread *thread); static void write_ref_field_post_entry(volatile CardValue* card_addr, JavaThread* thread); + + static address clone_addr(); }; #endif // SHARE_GC_G1_G1BARRIERSETRUNTIME_HPP diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp index 59e02452044..643a7936b9b 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp @@ -109,6 +109,10 @@ Label* BarrierStubC2::continuation() { return &_continuation; } +uint8_t BarrierStubC2::barrier_data() const { + return _node->barrier_data(); +} + void BarrierStubC2::preserve(Register r) { const VMReg vm_reg = r->as_VMReg(); assert(vm_reg->is_Register(), "r must be a general-purpose register"); diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp index c1485c069c8..00fbf1f2c9f 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -254,6 +254,8 @@ public: Label* entry(); // Return point from the stub (typically end of barrier). Label* continuation(); + // High-level, GC-specific barrier flags. + uint8_t barrier_data() const; // Preserve the value in reg across runtime calls in this barrier. void preserve(Register reg); @@ -340,6 +342,8 @@ public: // Estimated size of the node barrier in number of C2 Ideal nodes. // This is used to guide heuristics in C2, e.g. whether to unroll a loop. virtual uint estimated_barrier_size(const Node* node) const { return 0; } + // Whether the given store can be used to initialize a newly allocated object. + virtual bool can_initialize_object(const StoreNode* store) const { return true; } enum CompilePhase { BeforeOptimize, diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp index 87bb9f3cd51..11b742156a8 100644 --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp @@ -125,39 +125,10 @@ void CardTableBarrierSetC2::post_barrier(GraphKit* kit, kit->final_sync(ideal); } -void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const { - BarrierSetC2::clone(kit, src, dst, size, is_array); - const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - - // If necessary, emit some card marks afterwards. (Non-arrays only.) - bool card_mark = !is_array && !use_ReduceInitialCardMarks(); - if (card_mark) { - assert(!is_array, ""); - // Put in store barrier for any and all oops we are sticking - // into this object. (We could avoid this if we could prove - // that the object type contains no oop fields at all.) - Node* no_particular_value = nullptr; - Node* no_particular_field = nullptr; - int raw_adr_idx = Compile::AliasIdxRaw; - post_barrier(kit, kit->control(), - kit->memory(raw_adr_type), - dst, - no_particular_field, - raw_adr_idx, - no_particular_value, - T_OBJECT, - false); - } -} - bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const { return ReduceInitialCardMarks; } -bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const { - return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM; -} - void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); Node *shift = node->unique_out(); diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp index 9512f09ff8a..3bbf14892d3 100644 --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp @@ -42,8 +42,6 @@ protected: Node* byte_map_base_node(GraphKit* kit) const; public: - virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; - virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, bool is_clone_instance, ArrayCopyPhase phase) const; diff --git a/src/hotspot/share/opto/buildOopMap.cpp b/src/hotspot/share/opto/buildOopMap.cpp index 4591e87da2d..b553cc6ea69 100644 --- a/src/hotspot/share/opto/buildOopMap.cpp +++ b/src/hotspot/share/opto/buildOopMap.cpp @@ -235,6 +235,13 @@ OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, i Node *def = _defs[reg]; // Get reaching def assert( def, "since live better have reaching def" ); + if (def->is_MachTemp()) { + assert(!def->bottom_type()->isa_oop_ptr(), + "ADLC only assigns OOP types to MachTemp defs corresponding to xRegN operands"); + // Exclude MachTemp definitions even if they are typed as oops. + continue; + } + // Classify the reaching def as oop, derived, callee-save, dead, or other const Type *t = def->bottom_type(); if( t->isa_oop_ptr() ) { // Oop or derived? diff --git a/src/hotspot/share/opto/lcm.cpp b/src/hotspot/share/opto/lcm.cpp index 9db94748ca2..3c6de96074a 100644 --- a/src/hotspot/share/opto/lcm.cpp +++ b/src/hotspot/share/opto/lcm.cpp @@ -161,6 +161,14 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo Node *m = val->out(i); if( !m->is_Mach() ) continue; MachNode *mach = m->as_Mach(); + if (mach->barrier_data() != 0) { + // Using memory accesses with barriers to perform implicit null checks is + // not supported. These operations might expand into multiple assembly + // instructions during code emission, including new memory accesses (e.g. + // in G1's pre-barrier), which would invalidate the implicit null + // exception table. + continue; + } was_store = false; int iop = mach->ideal_Opcode(); switch( iop ) { diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index bf773d43d3d..6d96bff1c1c 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -1594,6 +1594,14 @@ static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool s // the same register. See find_shared_node. return false; } else { // Not a constant + if (!shared && Matcher::is_encode_and_store_pattern(n, m)) { + // Make it possible to match "encode and store" patterns with non-shared + // encode operations that are pinned to a control node (e.g. by CastPP + // node removal in final graph reshaping). The matched instruction cannot + // float above the encode's control node because it is pinned to the + // store's control node. + return false; + } // Stop recursion if they have different Controls. Node* m_control = m->in(0); // Control of load's memory can post-dominates load's control. @@ -2833,6 +2841,18 @@ bool Matcher::is_non_long_integral_vector(const Node* n) { return is_subword_type(bt) || bt == T_INT; } +bool Matcher::is_encode_and_store_pattern(const Node* n, const Node* m) { + if (n == nullptr || + m == nullptr || + n->Opcode() != Op_StoreN || + !m->is_EncodeP() || + n->as_Store()->barrier_data() == 0) { + return false; + } + assert(m == n->in(MemNode::ValueIn), "m should be input to n"); + return true; +} + #ifdef ASSERT bool Matcher::verify_after_postselect_cleanup() { assert(!C->failing(), "sanity"); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index 84e48086f92..25762835088 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -385,6 +385,8 @@ public: return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); } + static bool is_encode_and_store_pattern(const Node* n, const Node* m); + // These calls are all generated by the ADLC // Java-Java calling convention diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp index eee14e5ba03..66139188260 100644 --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -4644,6 +4644,11 @@ intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseGVN* phase, bool Node* mem = st->in(MemNode::Memory); if (!(mem->is_Proj() && mem->in(0) == this)) return FAIL; // must not be preceded by other stores + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if ((st->Opcode() == Op_StoreP || st->Opcode() == Op_StoreN) && + !bs->can_initialize_object(st)) { + return FAIL; + } Node* adr = st->in(MemNode::Address); intptr_t offset; AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset); diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp index b3f251bb361..260f887347f 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -2022,6 +2022,8 @@ void PhaseOutput::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_s // Handle implicit null exception table updates if (n->is_MachNullCheck()) { + assert(n->in(1)->as_Mach()->barrier_data() == 0, + "Implicit null checks on memory accesses with barriers are not yet supported"); uint block_num = block->non_connector_successor(0)->_pre_order; _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos()); continue; diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestVolatiles.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestVolatiles.java index 23b9321fc35..3f82c3e00b3 100644 --- a/test/hotspot/jtreg/compiler/c2/aarch64/TestVolatiles.java +++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestVolatiles.java @@ -261,20 +261,11 @@ public class TestVolatiles { }; break; case "G1": - // a card mark volatile barrier should be generated - // before the card mark strb - // - // following the fix for 8225776 the G1 barrier is now - // scheduled out of line after the membar volatile and - // and subsequent return matches = new String[] { "membar_release \\(elided\\)", useCompressedOops ? "stlrw?" : "stlr", "membar_volatile \\(elided\\)", - "ret", - "membar_volatile", - "dmb ish", - "strb" + "ret" }; break; case "Shenandoah": @@ -332,20 +323,11 @@ public class TestVolatiles { }; break; case "G1": - // a card mark volatile barrier should be generated - // before the card mark strb - // - // following the fix for 8225776 the G1 barrier is now - // scheduled out of line after the membar acquire and - // and subsequent return matches = new String[] { "membar_release \\(elided\\)", useCompressedOops ? "cmpxchgw?_acq" : "cmpxchg_acq", "membar_acquire \\(elided\\)", - "ret", - "membar_volatile", - "dmb ish", - "strb" + "ret" }; break; case "Shenandoah": @@ -418,20 +400,11 @@ public class TestVolatiles { return; case "G1": - // a card mark volatile barrier should be generated - // before the card mark strb - // - // following the fix for 8225776 the G1 barrier is now - // scheduled out of line after the membar acquire and - // and subsequent return matches = new String[] { "membar_release \\(elided\\)", useCompressedOops ? "cmpxchgw?_acq" : "cmpxchg_acq", "membar_acquire \\(elided\\)", - "ret", - "membar_volatile", - "dmb ish", - "strb" + "ret" }; break; case "Shenandoah": @@ -484,20 +457,11 @@ public class TestVolatiles { }; break; case "G1": - // a card mark volatile barrier should be generated - // before the card mark strb - // - // following the fix for 8225776 the G1 barrier is now - // scheduled out of line after the membar acquire and - // and subsequent return matches = new String[] { "membar_release \\(elided\\)", useCompressedOops ? "atomic_xchgw?_acq" : "atomic_xchg_acq", "membar_acquire \\(elided\\)", - "ret", - "membar_volatile", - "dmb ish", - "strb" + "ret" }; break; case "Shenandoah": diff --git a/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java b/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java index cd3d5329771..69b3cb5274b 100644 --- a/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java +++ b/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java @@ -1355,9 +1355,12 @@ public class AllocationMergesTests { } @Test - @IR(counts = { IRNode.ALLOC, "1" }) - // The last allocation won't be reduced because it would cause the creation - // of a nested SafePointScalarMergeNode. + // Using G1, all allocations are reduced. + @IR(applyIf = {"UseG1GC", "true"}, failOn = { IRNode.ALLOC }) + // Otherwise, the last allocation won't be reduced because it would cause + // the creation of a nested SafePointScalarMergeNode. This is caused by the + // store barrier corresponding to 'C.other = B'. + @IR(applyIf = {"UseG1GC", "false"}, counts = { IRNode.ALLOC, "1" }) int testReReduce_C2(boolean cond1, int x, int y) { return testReReduce(cond1, x, y); } @DontCompile diff --git a/test/hotspot/jtreg/compiler/gcbarriers/TestG1BarrierGeneration.java b/test/hotspot/jtreg/compiler/gcbarriers/TestG1BarrierGeneration.java new file mode 100644 index 00000000000..36ad0bf84a4 --- /dev/null +++ b/test/hotspot/jtreg/compiler/gcbarriers/TestG1BarrierGeneration.java @@ -0,0 +1,639 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.gcbarriers; + +import compiler.lib.ir_framework.*; +import java.lang.invoke.VarHandle; +import java.lang.invoke.MethodHandles; +import java.lang.ref.Reference; +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.util.concurrent.ThreadLocalRandom; +import jdk.test.lib.Asserts; + +/** + * @test + * @summary Test that G1 barriers are generated and optimized as expected. + * @library /test/lib / + * @requires vm.gc.G1 + * @run driver compiler.gcbarriers.TestG1BarrierGeneration + */ + +public class TestG1BarrierGeneration { + static final String PRE_ONLY = "pre"; + static final String POST_ONLY = "post"; + static final String POST_ONLY_NOT_NULL = "post notnull"; + static final String PRE_AND_POST = "pre post"; + static final String PRE_AND_POST_NOT_NULL = "pre post notnull"; + + static class Outer { + Object f; + } + + static class OuterWithVolatileField { + volatile Object f; + } + + static class OuterWithFewFields implements Cloneable { + Object f1; + Object f2; + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + } + + static class OuterWithManyFields implements Cloneable { + Object f1; + Object f2; + Object f3; + Object f4; + Object f5; + Object f6; + Object f7; + Object f8; + Object f9; + Object f10; + public Object clone() throws CloneNotSupportedException { + return super.clone(); + } + } + + static final VarHandle fVarHandle; + static { + MethodHandles.Lookup l = MethodHandles.lookup(); + try { + fVarHandle = l.findVarHandle(Outer.class, "f", Object.class); + } catch (Exception e) { + throw new Error(e); + } + } + + public static void main(String[] args) { + TestFramework framework = new TestFramework(); + Scenario[] scenarios = new Scenario[2*2]; + int scenarioIndex = 0; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + scenarios[scenarioIndex] = + new Scenario(scenarioIndex, + "-XX:CompileCommand=inline,java.lang.ref.*::*", + "-XX:" + (i == 0 ? "-" : "+") + "UseCompressedOops", + "-XX:" + (j == 0 ? "-" : "+") + "ReduceInitialCardMarks"); + scenarioIndex++; + } + } + framework.addScenarios(scenarios); + framework.start(); + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testStore(Outer o, Object o1) { + o.f = o1; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_STORE_N_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testStoreNull(Outer o) { + o.f = null; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_STORE_N_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testStoreObfuscatedNull(Outer o, Object o1) { + Object o2 = o1; + for (int i = 0; i < 4; i++) { + if ((i % 2) == 0) { + o2 = null; + } + } + // o2 is null here, but this is only known to C2 after applying some + // optimizations (loop unrolling, IGVN). + o.f = o2; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testStoreNotNull(Outer o, Object o1) { + if (o1.hashCode() == 42) { + return; + } + o.f = o1; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "2"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "2"}, + phase = CompilePhase.FINAL_CODE) + public static void testStoreTwice(Outer o, Outer p, Object o1) { + o.f = o1; + p.f = o1; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testStoreVolatile(OuterWithVolatileField o, Object o1) { + o.f = o1; + } + + @Test + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, POST_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, POST_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_P}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_N, IRNode.G1_ENCODE_P_AND_STORE_N}, + phase = CompilePhase.FINAL_CODE) + public static Outer testStoreOnNewObject(Object o1) { + Outer o = new Outer(); + o.f = o1; + return o; + } + + @Test + @IR(failOn = {IRNode.STORE_P, IRNode.STORE_N}, + phase = CompilePhase.BEFORE_MACRO_EXPANSION) + public static Outer testStoreNullOnNewObject() { + Outer o = new Outer(); + o.f = null; + return o; + } + + @Test + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, POST_ONLY_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, POST_ONLY_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_P}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_N, IRNode.G1_ENCODE_P_AND_STORE_N}, + phase = CompilePhase.FINAL_CODE) + public static Outer testStoreNotNullOnNewObject(Object o1) { + if (o1.hashCode() == 42) { + return null; + } + Outer o = new Outer(); + o.f = o1; + return o; + } + + @Test + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, POST_ONLY, "2"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "false"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, POST_ONLY, "2"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_P}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_N, IRNode.G1_ENCODE_P_AND_STORE_N}, + phase = CompilePhase.FINAL_CODE) + public static Outer testStoreOnNewObjectInTwoPaths(Object o1, boolean c) { + Outer o; + if (c) { + o = new Outer(); + o.f = o1; + } else { + o = new Outer(); + o.f = o1; + } + return o; + } + + @Run(test = {"testStore", + "testStoreNull", + "testStoreObfuscatedNull", + "testStoreNotNull", + "testStoreTwice", + "testStoreVolatile", + "testStoreOnNewObject", + "testStoreNullOnNewObject", + "testStoreNotNullOnNewObject", + "testStoreOnNewObjectInTwoPaths"}) + public void runStoreTests() { + { + Outer o = new Outer(); + Object o1 = new Object(); + testStore(o, o1); + Asserts.assertEquals(o1, o.f); + } + { + Outer o = new Outer(); + testStoreNull(o); + Asserts.assertNull(o.f); + } + { + Outer o = new Outer(); + Object o1 = new Object(); + testStoreObfuscatedNull(o, o1); + Asserts.assertNull(o.f); + } + { + Outer o = new Outer(); + Object o1 = new Object(); + testStoreNotNull(o, o1); + Asserts.assertEquals(o1, o.f); + } + { + Outer o = new Outer(); + Outer p = new Outer(); + Object o1 = new Object(); + testStoreTwice(o, p, o1); + Asserts.assertEquals(o1, o.f); + Asserts.assertEquals(o1, p.f); + } + { + OuterWithVolatileField o = new OuterWithVolatileField(); + Object o1 = new Object(); + testStoreVolatile(o, o1); + Asserts.assertEquals(o1, o.f); + } + { + Object o1 = new Object(); + Outer o = testStoreOnNewObject(o1); + Asserts.assertEquals(o1, o.f); + } + { + Outer o = testStoreNullOnNewObject(); + Asserts.assertNull(o.f); + } + { + Object o1 = new Object(); + Outer o = testStoreNotNullOnNewObject(o1); + Asserts.assertEquals(o1, o.f); + } + { + Object o1 = new Object(); + Outer o = testStoreOnNewObjectInTwoPaths(o1, ThreadLocalRandom.current().nextBoolean()); + Asserts.assertEquals(o1, o.f); + } + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testArrayStore(Object[] a, int index, Object o1) { + a[index] = o1; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_STORE_N_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testArrayStoreNull(Object[] a, int index) { + a[index] = null; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST_NOT_NULL, "1"}, + phase = CompilePhase.FINAL_CODE) + public static void testArrayStoreNotNull(Object[] a, int index, Object o1) { + if (o1.hashCode() == 42) { + return; + } + a[index] = o1; + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "2"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "2"}, + phase = CompilePhase.FINAL_CODE) + public static void testArrayStoreTwice(Object[] a, Object[] b, int index, Object o1) { + a[index] = o1; + b[index] = o1; + } + + @Test + @IR(applyIfAnd = {"UseCompressedOops", "false", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_P}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"UseCompressedOops", "true", "ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_N, IRNode.G1_ENCODE_P_AND_STORE_N}, + phase = CompilePhase.FINAL_CODE) + public static Object[] testStoreOnNewArray(Object o1) { + Object[] a = new Object[10]; + // The index needs to be concrete for C2 to detect that it is safe to + // remove the pre-barrier. + a[4] = o1; + return a; + } + + @Run(test = {"testArrayStore", + "testArrayStoreNull", + "testArrayStoreNotNull", + "testArrayStoreTwice", + "testStoreOnNewArray"}) + public void runArrayStoreTests() { + { + Object[] a = new Object[10]; + Object o1 = new Object(); + testArrayStore(a, 4, o1); + Asserts.assertEquals(o1, a[4]); + } + { + Object[] a = new Object[10]; + testArrayStoreNull(a, 4); + Asserts.assertNull(a[4]); + } + { + Object[] a = new Object[10]; + Object o1 = new Object(); + testArrayStoreNotNull(a, 4, o1); + Asserts.assertEquals(o1, a[4]); + } + { + Object[] a = new Object[10]; + Object[] b = new Object[10]; + Object o1 = new Object(); + testArrayStoreTwice(a, b, 4, o1); + Asserts.assertEquals(o1, a[4]); + Asserts.assertEquals(o1, b[4]); + } + { + Object o1 = new Object(); + Object[] a = testStoreOnNewArray(o1); + Asserts.assertEquals(o1, a[4]); + } + } + + @Test + public static Object[] testCloneArrayOfObjects(Object[] a) { + Object[] a1 = null; + try { + a1 = a.clone(); + } catch (Exception e) {} + return a1; + } + + @Test + @IR(applyIf = {"ReduceInitialCardMarks", "true"}, + failOn = {IRNode.G1_STORE_P, IRNode.G1_STORE_N, IRNode.G1_ENCODE_P_AND_STORE_N}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"ReduceInitialCardMarks", "false", "UseCompressedOops", "false"}, + counts = {IRNode.G1_STORE_P_WITH_BARRIER_FLAG, POST_ONLY, "2"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIfAnd = {"ReduceInitialCardMarks", "false", "UseCompressedOops", "true"}, + counts = {IRNode.G1_STORE_N_WITH_BARRIER_FLAG, POST_ONLY, "2"}, + phase = CompilePhase.FINAL_CODE) + public static OuterWithFewFields testCloneObjectWithFewFields(OuterWithFewFields o) { + Object o1 = null; + try { + o1 = o.clone(); + } catch (Exception e) {} + return (OuterWithFewFields)o1; + } + + @Test + @IR(applyIf = {"ReduceInitialCardMarks", "true"}, + counts = {IRNode.CALL_OF, "jlong_disjoint_arraycopy", "1"}) + @IR(applyIf = {"ReduceInitialCardMarks", "false"}, + counts = {IRNode.CALL_OF, "G1BarrierSetRuntime::clone", "1"}) + public static OuterWithManyFields testCloneObjectWithManyFields(OuterWithManyFields o) { + Object o1 = null; + try { + o1 = o.clone(); + } catch (Exception e) {} + return (OuterWithManyFields)o1; + } + + @Run(test = {"testCloneArrayOfObjects", + "testCloneObjectWithFewFields", + "testCloneObjectWithManyFields"}) + public void runCloneTests() { + { + Object o1 = new Object(); + Object[] a = new Object[4]; + for (int i = 0; i < 4; i++) { + a[i] = o1; + } + Object[] a1 = testCloneArrayOfObjects(a); + for (int i = 0; i < 4; i++) { + Asserts.assertEquals(o1, a1[i]); + } + } + { + Object a = new Object(); + Object b = new Object(); + OuterWithFewFields o = new OuterWithFewFields(); + o.f1 = a; + o.f2 = b; + OuterWithFewFields o1 = testCloneObjectWithFewFields(o); + Asserts.assertEquals(a, o1.f1); + Asserts.assertEquals(b, o1.f2); + } + { + Object a = new Object(); + Object b = new Object(); + Object c = new Object(); + Object d = new Object(); + Object e = new Object(); + Object f = new Object(); + Object g = new Object(); + Object h = new Object(); + Object i = new Object(); + Object j = new Object(); + OuterWithManyFields o = new OuterWithManyFields(); + o.f1 = a; + o.f2 = b; + o.f3 = c; + o.f4 = d; + o.f5 = e; + o.f6 = f; + o.f7 = g; + o.f8 = h; + o.f9 = i; + o.f10 = j; + OuterWithManyFields o1 = testCloneObjectWithManyFields(o); + Asserts.assertEquals(a, o1.f1); + Asserts.assertEquals(b, o1.f2); + Asserts.assertEquals(c, o1.f3); + Asserts.assertEquals(d, o1.f4); + Asserts.assertEquals(e, o1.f5); + Asserts.assertEquals(f, o1.f6); + Asserts.assertEquals(g, o1.f7); + Asserts.assertEquals(h, o1.f8); + Asserts.assertEquals(i, o1.f9); + Asserts.assertEquals(j, o1.f10); + } + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_COMPARE_AND_EXCHANGE_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_COMPARE_AND_EXCHANGE_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + static Object testCompareAndExchange(Outer o, Object oldVal, Object newVal) { + return fVarHandle.compareAndExchange(o, oldVal, newVal); + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_COMPARE_AND_SWAP_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_COMPARE_AND_SWAP_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + static boolean testCompareAndSwap(Outer o, Object oldVal, Object newVal) { + return fVarHandle.compareAndSet(o, oldVal, newVal); + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_GET_AND_SET_P_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_GET_AND_SET_N_WITH_BARRIER_FLAG, PRE_AND_POST, "1"}, + phase = CompilePhase.FINAL_CODE) + static Object testGetAndSet(Outer o, Object newVal) { + return fVarHandle.getAndSet(o, newVal); + } + + @Run(test = {"testCompareAndExchange", + "testCompareAndSwap", + "testGetAndSet"}) + public void runAtomicTests() { + { + Outer o = new Outer(); + Object oldVal = new Object(); + o.f = oldVal; + Object newVal = new Object(); + Object oldVal2 = testCompareAndExchange(o, oldVal, newVal); + Asserts.assertEquals(oldVal, oldVal2); + Asserts.assertEquals(o.f, newVal); + } + { + Outer o = new Outer(); + Object oldVal = new Object(); + o.f = oldVal; + Object newVal = new Object(); + boolean b = testCompareAndSwap(o, oldVal, newVal); + Asserts.assertTrue(b); + Asserts.assertEquals(o.f, newVal); + } + { + Outer o = new Outer(); + Object oldVal = new Object(); + o.f = oldVal; + Object newVal = new Object(); + Object oldVal2 = testGetAndSet(o, newVal); + Asserts.assertEquals(oldVal, oldVal2); + Asserts.assertEquals(o.f, newVal); + } + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_LOAD_P_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_LOAD_N_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + static Object testLoadSoftReference(SoftReference ref) { + return ref.get(); + } + + @Test + @IR(applyIf = {"UseCompressedOops", "false"}, + counts = {IRNode.G1_LOAD_P_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + @IR(applyIf = {"UseCompressedOops", "true"}, + counts = {IRNode.G1_LOAD_N_WITH_BARRIER_FLAG, PRE_ONLY, "1"}, + phase = CompilePhase.FINAL_CODE) + static Object testLoadWeakReference(WeakReference ref) { + return ref.get(); + } + + @Run(test = {"testLoadSoftReference", + "testLoadWeakReference"}) + public void runReferenceTests() { + { + Object o1 = new Object(); + SoftReference sref = new SoftReference(o1); + Object o2 = testLoadSoftReference(sref); + Asserts.assertTrue(o2 == o1 || o2 == null); + } + { + Object o1 = new Object(); + WeakReference wref = new WeakReference(o1); + Object o2 = testLoadWeakReference(wref); + Asserts.assertTrue(o2 == o1 || o2 == null); + } + } +} diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 16f56012d3d..a7c61f71050 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -358,6 +358,11 @@ public class IRNode { beforeMatchingNameRegex(CALL, "Call.*Java"); } + public static final String CALL_OF = COMPOSITE_PREFIX + "CALL_OF" + POSTFIX; + static { + callOfNodes(CALL_OF, "Call.*"); + } + public static final String CALL_OF_METHOD = COMPOSITE_PREFIX + "CALL_OF_METHOD" + POSTFIX; static { callOfNodes(CALL_OF_METHOD, "Call.*Java"); @@ -581,6 +586,92 @@ public class IRNode { vectorNode(FMA_VD, "FmaVD", TYPE_DOUBLE); } + public static final String G1_COMPARE_AND_EXCHANGE_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_COMPARE_AND_EXCHANGE_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1CompareAndExchangeN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_COMPARE_AND_EXCHANGE_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_COMPARE_AND_EXCHANGE_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_COMPARE_AND_EXCHANGE_P_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1CompareAndExchangeP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_COMPARE_AND_EXCHANGE_P_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_COMPARE_AND_SWAP_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_COMPARE_AND_SWAP_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1CompareAndSwapN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_COMPARE_AND_SWAP_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_COMPARE_AND_SWAP_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_COMPARE_AND_SWAP_P_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1CompareAndSwapP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_COMPARE_AND_SWAP_P_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_ENCODE_P_AND_STORE_N = PREFIX + "G1_ENCODE_P_AND_STORE_N" + POSTFIX; + static { + machOnlyNameRegex(G1_ENCODE_P_AND_STORE_N, "g1EncodePAndStoreN"); + } + + public static final String G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1EncodePAndStoreN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_ENCODE_P_AND_STORE_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_GET_AND_SET_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_GET_AND_SET_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1GetAndSetN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_GET_AND_SET_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_GET_AND_SET_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_GET_AND_SET_P_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1GetAndSetP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_GET_AND_SET_P_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_LOAD_N = PREFIX + "G1_LOAD_N" + POSTFIX; + static { + machOnlyNameRegex(G1_LOAD_N, "g1LoadN"); + } + + public static final String G1_LOAD_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_LOAD_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1LoadN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_LOAD_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_LOAD_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_LOAD_P_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1LoadP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_LOAD_P_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_STORE_N = PREFIX + "G1_STORE_N" + POSTFIX; + static { + machOnlyNameRegex(G1_STORE_N, "g1StoreN"); + } + + public static final String G1_STORE_N_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_STORE_N_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1StoreN\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_STORE_N_WITH_BARRIER_FLAG, regex); + } + + public static final String G1_STORE_P = PREFIX + "G1_STORE_P" + POSTFIX; + static { + machOnlyNameRegex(G1_STORE_P, "g1StoreP"); + } + + public static final String G1_STORE_P_WITH_BARRIER_FLAG = COMPOSITE_PREFIX + "G1_STORE_P_WITH_BARRIER_FLAG" + POSTFIX; + static { + String regex = START + "g1StoreP\\S*" + MID + "barrier\\(\\s*" + IS_REPLACED + "\\s*\\)" + END; + machOnly(G1_STORE_P_WITH_BARRIER_FLAG, regex); + } + public static final String IF = PREFIX + "IF" + POSTFIX; static { beforeMatchingNameRegex(IF, "If\\b"); @@ -852,6 +943,11 @@ public class IRNode { vectorNode(LSHIFT_VL, "LShiftVL", TYPE_LONG); } + public static final String MACH_TEMP = PREFIX + "MACH_TEMP" + POSTFIX; + static { + machOnlyNameRegex(MACH_TEMP, "MachTemp"); + } + public static final String MACRO_LOGIC_V = PREFIX + "MACRO_LOGIC_V" + POSTFIX; static { afterBarrierExpansionToBeforeMatching(MACRO_LOGIC_V, "MacroLogicV"); @@ -1148,6 +1244,12 @@ public class IRNode { trapNodes(NULL_CHECK_TRAP, "null_check"); } + public static final String OOPMAP_WITH = COMPOSITE_PREFIX + "OOPMAP_WITH" + POSTFIX; + static { + String regex = "(#\\s*OopMap\\s*\\{.*" + IS_REPLACED + ".*\\})"; + optoOnly(OOPMAP_WITH, regex); + } + public static final String OR_VB = VECTOR_PREFIX + "OR_VB" + POSTFIX; static { vectorNode(OR_VB, "OrV", TYPE_BYTE); diff --git a/test/hotspot/jtreg/compiler/runtime/safepoints/TestMachTempsAcrossSafepoints.java b/test/hotspot/jtreg/compiler/runtime/safepoints/TestMachTempsAcrossSafepoints.java new file mode 100644 index 00000000000..ecd8f58c5ed --- /dev/null +++ b/test/hotspot/jtreg/compiler/runtime/safepoints/TestMachTempsAcrossSafepoints.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.runtime.safepoints; + +import compiler.lib.ir_framework.*; +import java.lang.ref.SoftReference; + +/** + * @test + * @summary Test that undefined values generated by MachTemp nodes (in this + * case, derived from G1 barriers) are not included in OopMaps. + * Extracted from java.lang.invoke.LambdaFormEditor::getInCache. + * @key randomness + * @library /test/lib / + * @requires vm.gc.G1 & vm.bits == 64 & vm.opt.final.UseCompressedOops == true + * @run driver compiler.runtime.safepoints.TestMachTempsAcrossSafepoints + */ + +public class TestMachTempsAcrossSafepoints { + + static class RefWithKey extends SoftReference { + final int key; + + public RefWithKey(int key) { + super(new Object()); + this.key = key; + } + + @DontInline + @Override + public boolean equals(Object obj) { + return obj instanceof RefWithKey that && this.key == that.key; + } + } + + public static void main(String[] args) throws Exception { + String inlineCmd = "-XX:CompileCommand=inline,java.lang.ref.SoftReference::get"; + TestFramework.runWithFlags(inlineCmd, "-XX:+StressGCM", "-XX:+StressLCM", "-XX:StressSeed=1"); + TestFramework.runWithFlags(inlineCmd, "-XX:+StressGCM", "-XX:+StressLCM"); + } + + @Test + @IR(counts = {IRNode.G1_LOAD_N, "1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.MACH_TEMP, ">= 1"}, phase = CompilePhase.FINAL_CODE) + @IR(counts = {IRNode.STATIC_CALL_OF_METHOD, "equals", "2"}) + @IR(failOn = {IRNode.OOPMAP_WITH, "NarrowOop"}) + static private Object test(RefWithKey key, RefWithKey[] refs) { + RefWithKey k = null; + // This loop causes the register allocator to not "rematerialize" all + // MachTemp nodes generated for the reference g1LoadN instruction below. + for (int i = 0; i < refs.length; i++) { + RefWithKey k0 = refs[0]; + if (k0.equals(key)) { + k = k0; + } + } + if (k != null && !key.equals(k)) { + return null; + } + // The MachTemp node implementing the dst TEMP operand in the g1LoadN + // instruction corresponding to k.get() can be scheduled across the + // above call to RefWithKey::equals(), due to an unfortunate interaction + // of inaccurate basic block frequency estimation (emulated in this test + // by randomizing the GCM and LCM heuristics) and call-catch cleanup. + // Since narrow pointer MachTemp nodes are typed as narrow OOPs, this + // causes the oopmap builder to include the MachTemp node definition in + // the RefWithKey::equals() return oopmap. + return (k != null) ? k.get() : null; + } + + @Run(test = "test") + @Warmup(0) + public void run() { + RefWithKey ref = new RefWithKey(42); + test(ref, new RefWithKey[]{ref}); + } +} diff --git a/test/hotspot/jtreg/testlibrary/ctw/src/sun/hotspot/tools/ctw/CtwRunner.java b/test/hotspot/jtreg/testlibrary/ctw/src/sun/hotspot/tools/ctw/CtwRunner.java index df4f9063586..d62e286c68d 100644 --- a/test/hotspot/jtreg/testlibrary/ctw/src/sun/hotspot/tools/ctw/CtwRunner.java +++ b/test/hotspot/jtreg/testlibrary/ctw/src/sun/hotspot/tools/ctw/CtwRunner.java @@ -304,7 +304,10 @@ public class CtwRunner { "-XX:+StressMacroExpansion", "-XX:+StressIncrementalInlining", // StressSeed is uint - "-XX:StressSeed=" + rng.nextInt(Integer.MAX_VALUE))); + "-XX:StressSeed=" + rng.nextInt(Integer.MAX_VALUE), + // Do not fail on huge methods where StressGCM makes register + // allocation allocate lots of memory + "-XX:CompileCommand=memlimit,*.*,0")); for (String arg : CTW_EXTRA_ARGS.split(",")) { Args.add(arg); diff --git a/test/jdk/java/lang/invoke/BigArityTest.java b/test/jdk/java/lang/invoke/BigArityTest.java index 338903f3163..2dba056a183 100644 --- a/test/jdk/java/lang/invoke/BigArityTest.java +++ b/test/jdk/java/lang/invoke/BigArityTest.java @@ -24,7 +24,7 @@ /* @test * @summary High arity invocations * @compile BigArityTest.java - * @run junit/othervm/timeout=2500 -XX:+IgnoreUnrecognizedVMOptions -XX:-VerifyDependencies -esa -DBigArityTest.ITERATION_COUNT=1 test.java.lang.invoke.BigArityTest + * @run junit/othervm/timeout=2500 -XX:+IgnoreUnrecognizedVMOptions -XX:-VerifyDependencies -XX:CompileCommand=memlimit,*.*,0 -esa -DBigArityTest.ITERATION_COUNT=1 test.java.lang.invoke.BigArityTest */ package test.java.lang.invoke;