From 48f5220c8018f9c166df8d49ce1eb2070695d961 Mon Sep 17 00:00:00 2001
From: Martin Doerr <mdoerr@openjdk.org>
Date: Fri, 5 Feb 2021 12:58:23 +0000
Subject: [PATCH] 8260369: [PPC64] Add support for JDK-8200555

Reviewed-by: lucy
---
 src/hotspot/cpu/ppc/interp_masm_ppc.hpp       |  3 +-
 src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp    | 17 ++--
 src/hotspot/cpu/ppc/macroAssembler_ppc.cpp    | 22 +++--
 src/hotspot/cpu/ppc/macroAssembler_ppc.hpp    |  6 +-
 .../ppc/templateInterpreterGenerator_ppc.cpp  | 59 +++++++------
 src/hotspot/cpu/ppc/templateTable_ppc.hpp     |  3 +-
 src/hotspot/cpu/ppc/templateTable_ppc_64.cpp  | 88 ++++++++++---------
 7 files changed, 111 insertions(+), 87 deletions(-)

diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index fa896a2918d..f289b808150 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -77,7 +77,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
                          Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
 
   // Load object from cpool->resolved_references(index).
-  void load_resolved_reference_at_index(Register result, Register index, Register tmp1, Label *L_handle_null = NULL);
+  void load_resolved_reference_at_index(Register result, Register index, Register tmp1, Register tmp2,
+                                        Label *L_handle_null = NULL);
 
   // load cpool->resolved_klass_at(index)
   void load_resolved_klass_at_offset(Register Rcpool, Register Roffset, Register Rklass);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index de1e41d9b92..11e4cc2a3f7 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -477,33 +477,34 @@ void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset,
 // Load object from cpool->resolved_references(index).
 // Kills:
 //   - index
-void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Register tmp1,
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index,
+                                                                 Register tmp1, Register tmp2,
                                                                  Label *L_handle_null) {
-  assert_different_registers(result, index);
+  assert_different_registers(result, index, tmp1, tmp2);
+  assert(index->is_nonvolatile(), "needs to survive C-call in resolve_oop_handle");
   get_constant_pool(result);
 
   // Convert from field index to resolved_references() index and from
   // word index to byte offset. Since this is a java object, it can be compressed.
-  Register tmp2 = index;  // reuse
-  sldi(tmp1, index, LogBytesPerHeapOop);
+  sldi(index, index, LogBytesPerHeapOop);
   // Load pointer for resolved_references[] objArray.
   ld(result, ConstantPool::cache_offset_in_bytes(), result);
   ld(result, ConstantPoolCache::resolved_references_offset_in_bytes(), result);
-  resolve_oop_handle(result);
+  resolve_oop_handle(result, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE);
 #ifdef ASSERT
   Label index_ok;
   lwa(R0, arrayOopDesc::length_offset_in_bytes(), result);
   sldi(R0, R0, LogBytesPerHeapOop);
-  cmpd(CCR0, tmp1, R0);
+  cmpd(CCR0, index, R0);
   blt(CCR0, index_ok);
   stop("resolved reference index out of bounds");
   bind(index_ok);
 #endif
   // Add in the index.
-  add(result, tmp1, result);
+  add(result, index, result);
   load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result,
                 tmp1, tmp2,
-                MacroAssembler::PRESERVATION_FRAME_LR,
+                MacroAssembler::PRESERVATION_NONE,
                 0, L_handle_null);
 }
 
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 8ced76f08a0..4fdd764c743 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -3233,16 +3233,22 @@ void MacroAssembler::load_klass(Register dst, Register src) {
 }
 
 // ((OopHandle)result).resolve();
-void MacroAssembler::resolve_oop_handle(Register result) {
-  // OopHandle::resolve is an indirection.
-  ld(result, 0, result);
+void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2,
+                                        MacroAssembler::PreservationLevel preservation_level) {
+  access_load_at(T_OBJECT, IN_NATIVE, result, noreg, result, tmp1, tmp2, preservation_level);
 }
 
-void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
-  ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
-  ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
-  ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
-  resolve_oop_handle(mirror);
+void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2,
+                                         MacroAssembler::PreservationLevel preservation_level) {
+  Label resolved;
+
+  // A null weak handle resolves to null.
+  cmpdi(CCR0, result, 0);
+  beq(CCR0, resolved);
+
+  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, result, noreg, result, tmp1, tmp2,
+                 preservation_level);
+  bind(resolved);
 }
 
 void MacroAssembler::load_method_holder(Register holder, Register method) {
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index 4dd0ba6c34d..439d045c13f 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -743,8 +743,10 @@ class MacroAssembler: public Assembler {
   void store_klass(Register dst_oop, Register klass, Register tmp = R0);
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
 
-  void resolve_oop_handle(Register result);
-  void load_mirror_from_const_method(Register mirror, Register const_method);
+  void resolve_oop_handle(Register result, Register tmp1, Register tmp2,
+                          MacroAssembler::PreservationLevel preservation_level);
+  void resolve_weak_handle(Register result, Register tmp1, Register tmp2,
+                           MacroAssembler::PreservationLevel preservation_level);
   void load_method_holder(Register holder, Register method);
 
   static int instr_size_for_decode_klass_not_null();
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 515831bf58a..5a74b8ca1b0 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -933,11 +933,14 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc
 // state_size: We save the current state of the interpreter to this area.
 //
 void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals) {
-  Register parent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes.
-           top_frame_size      = R7_ARG5,
-           Rconst_method       = R8_ARG6;
+  Register Rparent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes.
+           Rtop_frame_size      = R7_ARG5,
+           Rconst_method        = R8_ARG6,
+           Rconst_pool          = R9_ARG7,
+           Rmirror              = R10_ARG8;
 
-  assert_different_registers(Rsize_of_parameters, Rsize_of_locals, parent_frame_resize, top_frame_size);
+  assert_different_registers(Rsize_of_parameters, Rsize_of_locals, Rparent_frame_resize, Rtop_frame_size,
+                             Rconst_method, Rconst_pool);
 
   __ ld(Rconst_method, method_(const));
   __ lhz(Rsize_of_parameters /* number of params */,
@@ -948,35 +951,35 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
     // We add two slots to the parameter_count, one for the jni
     // environment and one for a possible native mirror.
     Label skip_native_calculate_max_stack;
-    __ addi(top_frame_size, Rsize_of_parameters, 2);
-    __ cmpwi(CCR0, top_frame_size, Argument::n_register_parameters);
+    __ addi(Rtop_frame_size, Rsize_of_parameters, 2);
+    __ cmpwi(CCR0, Rtop_frame_size, Argument::n_register_parameters);
     __ bge(CCR0, skip_native_calculate_max_stack);
-    __ li(top_frame_size, Argument::n_register_parameters);
+    __ li(Rtop_frame_size, Argument::n_register_parameters);
     __ bind(skip_native_calculate_max_stack);
     __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
-    __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
-    __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
+    __ sldi(Rtop_frame_size, Rtop_frame_size, Interpreter::logStackElementSize);
+    __ sub(Rparent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
     assert(Rsize_of_locals == noreg, "Rsize_of_locals not initialized"); // Only relevant value is Rsize_of_parameters.
   } else {
     __ lhz(Rsize_of_locals /* number of params */, in_bytes(ConstMethod::size_of_locals_offset()), Rconst_method);
     __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
     __ sldi(Rsize_of_locals, Rsize_of_locals, Interpreter::logStackElementSize);
-    __ lhz(top_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method);
+    __ lhz(Rtop_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method);
     __ sub(R11_scratch1, Rsize_of_locals, Rsize_of_parameters); // >=0
-    __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
-    __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
-    __ add(parent_frame_resize, parent_frame_resize, R11_scratch1);
+    __ sub(Rparent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
+    __ sldi(Rtop_frame_size, Rtop_frame_size, Interpreter::logStackElementSize);
+    __ add(Rparent_frame_resize, Rparent_frame_resize, R11_scratch1);
   }
 
   // Compute top frame size.
-  __ addi(top_frame_size, top_frame_size, frame::abi_reg_args_size + frame::ijava_state_size);
+  __ addi(Rtop_frame_size, Rtop_frame_size, frame::abi_reg_args_size + frame::ijava_state_size);
 
   // Cut back area between esp and max_stack.
-  __ addi(parent_frame_resize, parent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize);
+  __ addi(Rparent_frame_resize, Rparent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize);
 
-  __ round_to(top_frame_size, frame::alignment_in_bytes);
-  __ round_to(parent_frame_resize, frame::alignment_in_bytes);
-  // parent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size.
+  __ round_to(Rtop_frame_size, frame::alignment_in_bytes);
+  __ round_to(Rparent_frame_resize, frame::alignment_in_bytes);
+  // Rparent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size.
   // Enlarge by locals-parameters (not in case of native_call), shrink by ESP-SP-ABI48.
 
   if (!native_call) {
@@ -984,15 +987,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
     // Native calls don't need the stack size check since they have no
     // expression stack and the arguments are already on the stack and
     // we only add a handful of words to the stack.
-    __ add(R11_scratch1, parent_frame_resize, top_frame_size);
+    __ add(R11_scratch1, Rparent_frame_resize, Rtop_frame_size);
     generate_stack_overflow_check(R11_scratch1, R12_scratch2);
   }
 
   // Set up interpreter state registers.
 
   __ add(R18_locals, R15_esp, Rsize_of_parameters);
-  __ ld(R27_constPoolCache, in_bytes(ConstMethod::constants_offset()), Rconst_method);
-  __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), R27_constPoolCache);
+  __ ld(Rconst_pool, in_bytes(ConstMethod::constants_offset()), Rconst_method);
+  __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), Rconst_pool);
 
   // Set method data pointer.
   if (ProfileInterpreter) {
@@ -1012,19 +1015,21 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
 
   // Resize parent frame.
   __ mflr(R12_scratch2);
-  __ neg(parent_frame_resize, parent_frame_resize);
-  __ resize_frame(parent_frame_resize, R11_scratch1);
+  __ neg(Rparent_frame_resize, Rparent_frame_resize);
+  __ resize_frame(Rparent_frame_resize, R11_scratch1);
   __ std(R12_scratch2, _abi0(lr), R1_SP);
 
   // Get mirror and store it in the frame as GC root for this Method*.
-  __ load_mirror_from_const_method(R12_scratch2, Rconst_method);
+  __ ld(Rmirror, ConstantPool::pool_holder_offset_in_bytes(), Rconst_pool);
+  __ ld(Rmirror, in_bytes(Klass::java_mirror_offset()), Rmirror);
+  __ resolve_oop_handle(Rmirror, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_FRAME_LR_GP_REGS);
 
   __ addi(R26_monitor, R1_SP, -frame::ijava_state_size);
   __ addi(R15_esp, R26_monitor, -Interpreter::stackElementSize);
 
   // Store values.
   __ std(R19_method, _ijava_state_neg(method), R1_SP);
-  __ std(R12_scratch2, _ijava_state_neg(mirror), R1_SP);
+  __ std(Rmirror, _ijava_state_neg(mirror), R1_SP);
   __ std(R18_locals, _ijava_state_neg(locals), R1_SP);
   __ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP);
 
@@ -1046,12 +1051,12 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist
   __ std(R0, _ijava_state_neg(oop_tmp), R1_SP); // only used for native_call
 
   // Store sender's SP and this frame's top SP.
-  __ subf(R12_scratch2, top_frame_size, R1_SP);
+  __ subf(R12_scratch2, Rtop_frame_size, R1_SP);
   __ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP);
   __ std(R12_scratch2, _ijava_state_neg(top_frame_sp), R1_SP);
 
   // Push top frame.
-  __ push_frame(top_frame_size, R11_scratch1);
+  __ push_frame(Rtop_frame_size, R11_scratch1);
 }
 
 // End of helpers
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc.hpp b/src/hotspot/cpu/ppc/templateTable_ppc.hpp
index 0ab9814ebae..afe10c2d6fd 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc.hpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc.hpp
@@ -26,7 +26,8 @@
 #ifndef CPU_PPC_TEMPLATETABLE_PPC_HPP
 #define CPU_PPC_TEMPLATETABLE_PPC_HPP
 
-  static void prepare_invoke(int byte_no, Register Rmethod, Register Rret_addr, Register Rindex, Register Rrecv, Register Rflags, Register Rscratch);
+  static void prepare_invoke(int byte_no, Register Rmethod, Register Rret_addr, Register Rindex, Register Rrecv, Register Rflags,
+                             Register Rscratch1, Register Rscratch2);
   static void invokevfinal_helper(Register Rmethod, Register Rflags, Register Rscratch1, Register Rscratch2);
   static void generate_vtable_call(Register Rrecv_klass, Register Rindex, Register Rret, Register Rtemp);
   static void invokeinterface_object_method(Register Rrecv_klass, Register Rret, Register Rflags, Register Rindex, Register Rtemp, Register Rtemp2);
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index d5916eb0a73..70a805d3484 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -309,14 +309,14 @@ void TemplateTable::fast_aldc(bool wide) {
 
   // We are resolved if the resolved reference cache entry contains a
   // non-null object (CallSite, etc.)
-  __ get_cache_index_at_bcp(R11_scratch1, 1, index_size);  // Load index.
-  __ load_resolved_reference_at_index(R17_tos, R11_scratch1, R12_scratch2, &is_null);
+  __ get_cache_index_at_bcp(R31, 1, index_size);  // Load index.
+  __ load_resolved_reference_at_index(R17_tos, R31, R11_scratch1, R12_scratch2, &is_null);
 
   // Convert null sentinel to NULL
   int simm16_rest = __ load_const_optimized(R11_scratch1, Universe::the_null_sentinel_addr(), R0, true);
-  __ ld(R11_scratch1, simm16_rest, R11_scratch1);
-  __ resolve_oop_handle(R11_scratch1);
-  __ cmpld(CCR0, R17_tos, R11_scratch1);
+  __ ld(R31, simm16_rest, R11_scratch1);
+  __ resolve_oop_handle(R31, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
+  __ cmpld(CCR0, R17_tos, R31);
   if (VM_Version::has_isel()) {
     __ isel_0(R17_tos, CCR0, Assembler::equal);
   } else {
@@ -2252,14 +2252,16 @@ void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Regist
 //   - Rcache, Rindex
 // Output:
 //   - Robj, Roffset, Rflags
+// Kills:
+//   - R11, R12
 void TemplateTable::load_field_cp_cache_entry(Register Robj,
                                               Register Rcache,
                                               Register Rindex /* unused on PPC64 */,
                                               Register Roffset,
                                               Register Rflags,
-                                              bool is_static = false) {
-  assert_different_registers(Rcache, Rflags, Roffset);
-  // assert(Rindex == noreg, "parameter not used on PPC64");
+                                              bool is_static) {
+  assert_different_registers(Rcache, Rflags, Roffset, R11_scratch1, R12_scratch2);
+  assert(Rindex == noreg, "parameter not used on PPC64");
 
   ByteSize cp_base_offset = ConstantPoolCache::base_offset();
   __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcache);
@@ -2267,7 +2269,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
   if (is_static) {
     __ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
     __ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
-    __ resolve_oop_handle(Robj);
+    __ resolve_oop_handle(Robj, R11_scratch1, R12_scratch2, MacroAssembler::PRESERVATION_NONE);
     // Acquire not needed here. Following access has an address dependency on this value.
   }
 }
@@ -2427,7 +2429,8 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
                  Rflags        = R31,
                  Rbtable       = R5_ARG3,
                  Rbc           = R30,
-                 Rscratch      = R12_scratch2;
+                 Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
+                 // R12_scratch2 used by load_field_cp_cache_entry
 
   static address field_branch_table[number_of_states],
                  static_branch_table[number_of_states];
@@ -2441,7 +2444,7 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr
   jvmti_post_field_access(Rcache, Rscratch, is_static, false);
 
   // Load after possible GC.
-  load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static);
+  load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static); // Uses R11, R12
 
   // Load pointer to branch table.
   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
@@ -2758,10 +2761,10 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
   const Register Rcache        = R5_ARG3,  // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
                  Rclass_or_obj = R31,      // Needs to survive C call.
                  Roffset       = R22_tmp2, // Needs to survive C call.
-                 Rflags        = R3_ARG1,
+                 Rflags        = R30,
                  Rbtable       = R4_ARG2,
-                 Rscratch      = R11_scratch1,
-                 Rscratch2     = R12_scratch2,
+                 Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
+                 Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
                  Rscratch3     = R6_ARG4,
                  Rbc           = Rscratch3;
   const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
@@ -2780,7 +2783,7 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteContr
   // Load the field offset.
   resolve_cache_and_index(byte_no, Rcache, Rscratch, sizeof(u2));
   jvmti_post_field_mod(Rcache, Rscratch, is_static);
-  load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static);
+  load_field_cp_cache_entry(Rclass_or_obj, Rcache, noreg, Roffset, Rflags, is_static); // Uses R11, R12
 
   // Load pointer to branch table.
   __ load_const_optimized(Rbtable, (address)branch_table, Rscratch);
@@ -3007,15 +3010,15 @@ void TemplateTable::fast_storefield(TosState state) {
                  Rclass_or_obj = R31,      // Needs to survive C call.
                  Roffset       = R22_tmp2, // Needs to survive C call.
                  Rflags        = R3_ARG1,
-                 Rscratch      = R11_scratch1,
-                 Rscratch2     = R12_scratch2,
+                 Rscratch      = R11_scratch1, // used by load_field_cp_cache_entry
+                 Rscratch2     = R12_scratch2, // used by load_field_cp_cache_entry
                  Rscratch3     = R4_ARG2;
   const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
 
   // Constant pool already resolved => Load flags and offset of field.
   __ get_cache_and_index_at_bcp(Rcache, 1);
   jvmti_post_field_mod(Rcache, Rscratch, false /* not static */);
-  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
+  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
 
   // Get the obj and the final store addr.
   pop_and_check_object(Rclass_or_obj); // Kills R11_scratch1.
@@ -3090,11 +3093,12 @@ void TemplateTable::fast_accessfield(TosState state) {
                  Rclass_or_obj = R17_tos,
                  Roffset       = R22_tmp2,
                  Rflags        = R23_tmp3,
-                 Rscratch      = R12_scratch2;
+                 Rscratch      = R11_scratch1; // used by load_field_cp_cache_entry
+                 // R12_scratch2 used by load_field_cp_cache_entry
 
   // Constant pool already resolved. Get the field offset.
   __ get_cache_and_index_at_bcp(Rcache, 1);
-  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
+  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
 
   // JVMTI support
   jvmti_post_field_access(Rcache, Rscratch, false, true);
@@ -3226,13 +3230,14 @@ void TemplateTable::fast_xaccess(TosState state) {
                  Rclass_or_obj = R17_tos,
                  Roffset       = R22_tmp2,
                  Rflags        = R23_tmp3,
-                 Rscratch      = R12_scratch2;
+                 Rscratch      = R11_scratch1;
+                 // R12_scratch2 used by load_field_cp_cache_entry
 
   __ ld(Rclass_or_obj, 0, R18_locals);
 
   // Constant pool already resolved. Get the field offset.
   __ get_cache_and_index_at_bcp(Rcache, 2);
-  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false);
+  load_field_cp_cache_entry(noreg, Rcache, noreg, Roffset, Rflags, false); // Uses R11, R12
 
   // JVMTI support not needed, since we switch back to single bytecode as soon as debugger attaches.
 
@@ -3317,7 +3322,8 @@ void TemplateTable::prepare_invoke(int byte_no,
                                    Register Rindex,   // itable index, MethodType, Method, etc.
                                    Register Rrecv,    // If caller wants to see it.
                                    Register Rflags,   // If caller wants to test it.
-                                   Register Rscratch
+                                   Register Rscratch1,
+                                   Register Rscratch2
                                    ) {
   // Determine flags.
   const Bytecodes::Code code = bytecode();
@@ -3329,10 +3335,9 @@ void TemplateTable::prepare_invoke(int byte_no,
   const bool load_receiver       = (Rrecv != noreg);
   assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
 
-  assert_different_registers(Rmethod, Rindex, Rflags, Rscratch);
-  assert_different_registers(Rmethod, Rrecv, Rflags, Rscratch);
-  // Rret_addr and Rindex have to be distinct as Rret_addr is used as a second temp register
-  assert_different_registers(Rret_addr, Rindex, Rscratch);
+  assert_different_registers(Rmethod, Rindex, Rflags, Rscratch1);
+  assert_different_registers(Rmethod, Rrecv, Rflags, Rscratch1);
+  assert_different_registers(Rret_addr, Rscratch1);
 
   load_invoke_cp_cache_entry(byte_no, Rmethod, Rindex, Rflags, is_invokevirtual, false, is_invokedynamic);
 
@@ -3341,22 +3346,23 @@ void TemplateTable::prepare_invoke(int byte_no,
   // Maybe push "appendix" to arguments.
   if (is_invokedynamic || is_invokehandle) {
     Label Ldone;
-    Register reference = Rret_addr; // safe to use here; first use comes later
+    Register reference = Rscratch1;
 
     __ rldicl_(R0, Rflags, 64-ConstantPoolCacheEntry::has_appendix_shift, 63);
     __ beq(CCR0, Ldone);
     // Push "appendix" (MethodType, CallSite, etc.).
     // This must be done before we get the receiver,
     // since the parameter_size includes it.
-    __ load_resolved_reference_at_index(reference, Rindex, Rscratch);
+    __ load_resolved_reference_at_index(reference, Rindex, /* temp */ Rret_addr, Rscratch2);
     __ verify_oop(reference);
     __ push_ptr(reference);
+
     __ bind(Ldone);
   }
 
   // Load receiver if needed (after appendix is pushed so parameter size is correct).
   if (load_receiver) {
-    const Register Rparam_count = Rscratch;
+    Register Rparam_count = Rscratch1;
     __ andi(Rparam_count, Rflags, ConstantPoolCacheEntry::parameter_size_mask);
     __ load_receiver(Rparam_count, Rrecv);
     __ verify_oop(Rrecv);
@@ -3364,7 +3370,7 @@ void TemplateTable::prepare_invoke(int byte_no,
 
   // Get return address.
   {
-    Register Rtable_addr = Rscratch;
+    Register Rtable_addr = Rscratch1;
     Register Rret_type = Rret_addr;
     address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
 
@@ -3495,7 +3501,7 @@ void TemplateTable::invokespecial(int byte_no) {
            Rreceiver   = R6_ARG4,
            Rmethod     = R31;
 
-  prepare_invoke(byte_no, Rmethod, Rret_addr, noreg, Rreceiver, Rflags, R11_scratch1);
+  prepare_invoke(byte_no, Rmethod, Rret_addr, noreg, Rreceiver, Rflags, R11_scratch1, R12_scratch2);
 
   // Receiver NULL check.
   __ null_check_throw(Rreceiver, -1, R11_scratch1);
@@ -3514,7 +3520,7 @@ void TemplateTable::invokestatic(int byte_no) {
            Rret_addr   = R4_ARG2,
            Rflags      = R5_ARG3;
 
-  prepare_invoke(byte_no, R19_method, Rret_addr, noreg, noreg, Rflags, R11_scratch1);
+  prepare_invoke(byte_no, R19_method, Rret_addr, noreg, noreg, Rflags, R11_scratch1, R12_scratch2);
 
   __ profile_call(R11_scratch1, R12_scratch2);
   // Argument and return type profiling.
@@ -3566,7 +3572,7 @@ void TemplateTable::invokeinterface(int byte_no) {
                  Rrecv_klass      = R4_ARG2,
                  Rflags           = R7_ARG5;
 
-  prepare_invoke(byte_no, Rinterface_klass, Rret_addr, Rmethod, Rreceiver, Rflags, Rscratch1);
+  prepare_invoke(byte_no, Rinterface_klass, Rret_addr, Rmethod, Rreceiver, Rflags, Rscratch1, /* temp */ Rrecv_klass);
 
   // First check for Object case, then private interface method,
   // then regular interface method.
@@ -3653,10 +3659,11 @@ void TemplateTable::invokedynamic(int byte_no) {
   const Register Rret_addr = R3_ARG1,
                  Rflags    = R31,
                  Rmethod   = R22_tmp2,
-                 Rscratch1 = R11_scratch1,
-                 Rscratch2 = R12_scratch2;
+                 Rscratch1 = R30,
+                 Rscratch2 = R11_scratch1,
+                 Rscratch3 = R12_scratch2;
 
-  prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, noreg, Rflags, Rscratch2);
+  prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, noreg, Rflags, Rscratch2, Rscratch3);
 
   // Profile this call.
   __ profile_call(Rscratch1, Rscratch2);
@@ -3678,10 +3685,11 @@ void TemplateTable::invokehandle(int byte_no) {
                  Rflags    = R31,
                  Rrecv     = R5_ARG3,
                  Rmethod   = R22_tmp2,
-                 Rscratch1 = R11_scratch1,
-                 Rscratch2 = R12_scratch2;
+                 Rscratch1 = R30,
+                 Rscratch2 = R11_scratch1,
+                 Rscratch3 = R12_scratch2;
 
-  prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, Rrecv, Rflags, Rscratch2);
+  prepare_invoke(byte_no, Rmethod, Rret_addr, Rscratch1, Rrecv, Rflags, Rscratch2, Rscratch3);
   __ verify_method_ptr(Rmethod);
   __ null_check_throw(Rrecv, -1, Rscratch2);