From f906a432e9fbac0e8680d60a5a6b0674b70e2f17 Mon Sep 17 00:00:00 2001
From: Zhengyu Gu <zgu@openjdk.org>
Date: Mon, 28 Oct 2019 11:33:28 -0400
Subject: [PATCH] 8232992: Shenandoah: Implement self-fixing interpreter LRB

Reviewed-by: shade
---
 .../shenandoahBarrierSetAssembler_aarch64.cpp | 49 ++++++++++---
 .../shenandoahBarrierSetAssembler_aarch64.hpp |  4 +-
 .../shenandoahBarrierSetAssembler_x86.cpp     | 70 ++++++++++++++-----
 .../shenandoahBarrierSetAssembler_x86.hpp     |  4 +-
 .../shenandoah/c2/shenandoahBarrierSetC2.cpp  |  3 +-
 .../gc/shenandoah/c2/shenandoahSupport.cpp    |  4 +-
 .../share/gc/shenandoah/shenandoahRuntime.cpp |  8 +--
 .../share/gc/shenandoah/shenandoahRuntime.hpp |  5 +-
 8 files changed, 100 insertions(+), 47 deletions(-)

diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index af23561f131..13fb8a5f8fe 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -233,9 +233,10 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
   assert(dst != rscratch2, "need rscratch2");
+  assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
 
   Label done;
   __ enter();
@@ -245,17 +246,38 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
   // Check for heap stability
   __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
 
-  RegSet to_save = RegSet::of(r0);
+  // use r1 for load address
+  Register result_dst = dst;
+  if (dst == r1) {
+    __ mov(rscratch1, dst);
+    dst = rscratch1;
+  }
+
+  RegSet to_save_r1 = RegSet::of(r1);
+  // If outgoing register is r1, we can clobber it
+  if (result_dst != r1) {
+    __ push(to_save_r1, sp);
+  }
+  __ lea(r1, load_addr);
+
+  RegSet to_save_r0 = RegSet::of(r0);
   if (dst != r0) {
-    __ push(to_save, sp);
+    __ push(to_save_r0, sp);
     __ mov(r0, dst);
   }
 
   __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
 
+  if (result_dst != r0) {
+    __ mov(result_dst, r0);
+  }
+
   if (dst != r0) {
-    __ mov(dst, r0);
-    __ pop(to_save, sp);
+    __ pop(to_save_r0, sp);
+  }
+
+  if (result_dst != r1) {
+    __ pop(to_save_r1, sp);
   }
 
   __ bind(done);
@@ -315,11 +337,11 @@ void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Regis
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Register tmp) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
   if (ShenandoahLoadRefBarrier) {
     Label is_null;
     __ cbz(dst, is_null);
-    load_reference_barrier_not_null(masm, dst, tmp);
+    load_reference_barrier_not_null(masm, dst, load_addr);
     __ bind(is_null);
   }
 }
@@ -349,7 +371,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
     if (not_in_heap && !is_traversal_mode) {
       load_reference_barrier_native(masm, dst, src);
     } else {
-      load_reference_barrier(masm, dst, tmp1);
+      load_reference_barrier(masm, dst, src);
     }
 
     if (dst != result_dst) {
@@ -619,9 +641,9 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
   __ load_parameter(0, r0);
   __ load_parameter(1, r1);
   if (UseCompressedOops) {
-    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow));
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
   } else {
-    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup));
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
   }
   __ blr(lr);
   __ mov(rscratch1, r0);
@@ -646,6 +668,7 @@ address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 //
 // Input:
 //   r0: OOP to evacuate.  Not null.
+//   r1: load address
 //
 // Output:
 //   r0: Pointer to evacuated OOP.
@@ -681,7 +704,11 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
 
   __ push_call_clobbered_registers();
 
-  __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  if (UseCompressedOops) {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+  } else {
+    __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+  }
   __ blr(lr);
   __ mov(rscratch1, r0);
   __ pop_call_clobbered_registers();
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
index f71e5d83d1a..fbea8ec21a2 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
@@ -56,8 +56,8 @@ private:
 
   void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
   void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
-  void load_reference_barrier(MacroAssembler* masm, Register dst, Register tmp);
-  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp);
+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
   void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr);
 
   address generate_shenandoah_lrb(StubCodeGenerator* cgen);
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index b6bc10bf6cc..2aca956ba22 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -247,7 +247,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
   __ bind(done);
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) {
   assert(ShenandoahLoadRefBarrier, "Should be enabled");
 
   Label done;
@@ -262,26 +262,51 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
   __ push(thread);
   __ get_thread(thread);
 #endif
-  assert_different_registers(dst, thread);
 
   Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
   __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
   __ jccb(Assembler::zero, done);
 
-   if (dst != rax) {
-     __ xchgptr(dst, rax); // Move obj into rax and save rax into obj.
-   }
+  // Use rsi for src address
+  const Register src_addr = rsi;
+  // Setup address parameter first, if it does not clobber oop in dst
+  bool need_addr_setup = (src_addr != dst);
 
-   __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+  if (need_addr_setup) {
+    __ push(src_addr);
+    __ lea(src_addr, src);
 
-   if (dst != rax) {
-     __ xchgptr(rax, dst); // Swap back obj with rax.
-   }
+    if (dst != rax) {
+      // Move obj into rax and save rax
+      __ push(rax);
+      __ movptr(rax, dst);
+    }
+  } else {
+    // dst == rsi
+    __ push(rax);
+    __ movptr(rax, dst);
+
+    // we can clobber it, since it is outgoing register
+    __ lea(src_addr, src);
+  }
+
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
+
+  if (need_addr_setup) {
+    if (dst != rax) {
+      __ movptr(dst, rax);
+      __ pop(rax);
+    }
+    __ pop(src_addr);
+  } else {
+    __ movptr(dst, rax);
+    __ pop(rax);
+  }
 
   __ bind(done);
 
 #ifndef _LP64
-  __ pop(thread);
+    __ pop(thread);
 #endif
 }
 
@@ -410,12 +435,12 @@ void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm,
   }
 }
 
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) {
   if (ShenandoahLoadRefBarrier) {
     Label done;
     __ testptr(dst, dst);
     __ jcc(Assembler::zero, done);
-    load_reference_barrier_not_null(masm, dst);
+    load_reference_barrier_not_null(masm, dst, src);
     __ bind(done);
   }
 }
@@ -454,7 +479,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
     if (not_in_heap && !is_traversal_mode) {
       load_reference_barrier_native(masm, dst, src);
     } else {
-      load_reference_barrier(masm, dst);
+      load_reference_barrier(masm, dst, src);
     }
 
     if (dst != result_dst) {
@@ -864,14 +889,14 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
   __ load_parameter(0, c_rarg0);
   __ load_parameter(1, c_rarg1);
   if (UseCompressedOops) {
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow), c_rarg0, c_rarg1);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
   } else {
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), c_rarg0, c_rarg1);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
   }
 #else
   __ load_parameter(0, rax);
   __ load_parameter(1, rbx);
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup), rax, rbx);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
 #endif
 
   __ restore_live_registers_except_rax(true);
@@ -890,6 +915,11 @@ address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
 
 #define __ cgen->assembler()->
 
+/*
+ *  Incoming parameters:
+ *  rax: oop
+ *  rsi: load address
+ */
 address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
   __ align(CodeEntryAlignment);
   StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
@@ -941,7 +971,6 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
   __ push(rcx);
   __ push(rdx);
   __ push(rdi);
-  __ push(rsi);
 #ifdef _LP64
   __ push(r8);
   __ push(r9);
@@ -956,7 +985,11 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
   __ movptr(rbp, rsp);
   __ andptr(rsp, -StackAlignmentInBytes);
   __ push_FPU_state();
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax);
+  if (UseCompressedOops) {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi);
+  } else {
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi);
+  }
   __ pop_FPU_state();
   __ movptr(rsp, rbp);
   __ pop(rbp);
@@ -970,7 +1003,6 @@ address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator
   __ pop(r9);
   __ pop(r8);
 #endif
-  __ pop(rsi);
   __ pop(rdi);
   __ pop(rdx);
   __ pop(rcx);
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index 23f20977fbb..edaa5103e6e 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -55,7 +55,7 @@ private:
                                     bool tosca_live,
                                     bool expand_call);
 
-  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst);
+  void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src);
 
   void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);
 
@@ -72,7 +72,7 @@ public:
   void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm);
 #endif
 
-  void load_reference_barrier(MacroAssembler* masm, Register dst);
+  void load_reference_barrier(MacroAssembler* masm, Register dst, Address src);
   void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src);
 
   void cmpxchg_oop(MacroAssembler* masm,
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
index efdbf291f42..b47497ab60a 100644
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp
@@ -303,8 +303,7 @@ bool ShenandoahBarrierSetC2::is_shenandoah_lrb_call(Node* call) {
 
   address entry_point = call->as_CallLeaf()->entry_point();
   return (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)) ||
-         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup)) ||
-         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow)) ||
+         (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)) ||
          (entry_point == CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
 }
 
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
index 11b7de7a491..54eeeb2ba43 100644
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
@@ -1028,8 +1028,8 @@ void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* lo
   phase->register_new_node(mm, ctrl);
 
   address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
-          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup_narrow) :
-          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_fixup);
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
+          CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
 
   address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
                                : target;
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
index 824de2a611a..c83cec57c94 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.cpp
@@ -62,15 +62,11 @@ JRT_LEAF(void, ShenandoahRuntime::write_ref_field_pre_entry(oopDesc* orig, JavaT
   ShenandoahThreadLocalData::satb_mark_queue(thread).enqueue_known_active(orig);
 JRT_END
 
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc* src))
-  return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, (oop*)NULL);
-JRT_END
-
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup(oopDesc* src, oop* load_addr))
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier(oopDesc* src, oop* load_addr))
   return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
 JRT_END
 
-JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr))
+JRT_LEAF(oopDesc*, ShenandoahRuntime::load_reference_barrier_narrow(oopDesc* src, narrowOop* load_addr))
   return ShenandoahBarrierSet::barrier_set()->load_reference_barrier_mutator(src, load_addr);
 JRT_END
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
index 3ce9d63cae1..d4671f3aeb8 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahRuntime.hpp
@@ -38,9 +38,8 @@ public:
   static void write_ref_array_pre_duinit_narrow_oop_entry(narrowOop* src, narrowOop* dst, size_t length);
   static void write_ref_field_pre_entry(oopDesc* orig, JavaThread* thread);
 
-  static oopDesc* load_reference_barrier(oopDesc* src);
-  static oopDesc* load_reference_barrier_fixup(oopDesc* src, oop* load_addr);
-  static oopDesc* load_reference_barrier_fixup_narrow(oopDesc* src, narrowOop* load_addr);
+  static oopDesc* load_reference_barrier(oopDesc* src, oop* load_addr);
+  static oopDesc* load_reference_barrier_narrow(oopDesc* src, narrowOop* load_addr);
 
   static oopDesc* load_reference_barrier_native(oopDesc* src, oop* load_addr);