8287061: Support for rematerializing scalar replaced objects participating in allocation merges

Reviewed-by: kvn, vlivanov
2023-07-17 23:01:35 +00:00 · 2023-07-17 23:01:35 +00:00 · a53345ad03
commit a53345ad03
parent 3236ba0be4
26 changed files with 2631 additions and 254 deletions
--- a/src/hotspot/share/code/debugInfo.cpp
+++ b/src/hotspot/share/code/debugInfo.cpp
@ -29,6 +29,7 @@
 #include "gc/shared/collectedHeap.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/stackValue.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/javaThread.hpp"
@ -80,6 +81,20 @@ ScopeValue* DebugInfoReadStream::read_object_value(bool is_auto_box) {
  return result;
 }

+ScopeValue* DebugInfoReadStream::read_object_merge_value() {
+  int id = read_int();
+#ifdef ASSERT
+  assert(_obj_pool != nullptr, "object pool does not exist");
+  for (int i = _obj_pool->length() - 1; i >= 0; i--) {
+    assert(_obj_pool->at(i)->as_ObjectValue()->id() != id, "should not be read twice");
+  }
+#endif
+  ObjectMergeValue* result = new ObjectMergeValue(id);
+  _obj_pool->push(result);
+  result->read_object(this);
+  return result;
+}
+
 ScopeValue* DebugInfoReadStream::get_cached_object() {
  int id = read_int();
  assert(_obj_pool != nullptr, "object pool does not exist");
@ -98,7 +113,8 @@ ScopeValue* DebugInfoReadStream::get_cached_object() {
 enum { LOCATION_CODE = 0, CONSTANT_INT_CODE = 1,  CONSTANT_OOP_CODE = 2,
                          CONSTANT_LONG_CODE = 3, CONSTANT_DOUBLE_CODE = 4,
                          OBJECT_CODE = 5,        OBJECT_ID_CODE = 6,
-                          AUTO_BOX_OBJECT_CODE = 7, MARKER_CODE = 8 };
+                          AUTO_BOX_OBJECT_CODE = 7, MARKER_CODE = 8,
+                          OBJECT_MERGE_CODE = 9 };

 ScopeValue* ScopeValue::read_from(DebugInfoReadStream* stream) {
  ScopeValue* result = nullptr;
@ -110,6 +126,7 @@ ScopeValue* ScopeValue::read_from(DebugInfoReadStream* stream) {
   case CONSTANT_DOUBLE_CODE: result = new ConstantDoubleValue(stream);                  break;
   case OBJECT_CODE:          result = stream->read_object_value(false /*is_auto_box*/); break;
   case AUTO_BOX_OBJECT_CODE: result = stream->read_object_value(true /*is_auto_box*/);  break;
+   case OBJECT_MERGE_CODE:    result = stream->read_object_merge_value();                break;
   case OBJECT_ID_CODE:       result = stream->get_cached_object();                      break;
   case MARKER_CODE:          result = new MarkerValue();                                break;
   default: ShouldNotReachHere();
@ -149,6 +166,7 @@ void ObjectValue::set_value(oop value) {
 }

 void ObjectValue::read_object(DebugInfoReadStream* stream) {
+  _is_root = stream->read_bool();
  _klass = read_from(stream);
  assert(_klass->is_constant_oop(), "should be constant java mirror oop");
  int length = stream->read_int();
@ -166,6 +184,7 @@ void ObjectValue::write_on(DebugInfoWriteStream* stream) {
    set_visited(true);
    stream->write_int(is_auto_box() ? AUTO_BOX_OBJECT_CODE : OBJECT_CODE);
    stream->write_int(_id);
+    stream->write_bool(_is_root);
    _klass->write_on(stream);
    int length = _field_values.length();
    stream->write_int(length);
@ -176,21 +195,106 @@ void ObjectValue::write_on(DebugInfoWriteStream* stream) {
 }

 void ObjectValue::print_on(outputStream* st) const {
-  st->print("%s[%d]", is_auto_box() ? "box_obj" : "obj", _id);
+  st->print("%s[%d]", is_auto_box() ? "box_obj" : is_object_merge() ? "merge_obj" : "obj", _id);
 }

 void ObjectValue::print_fields_on(outputStream* st) const {
 #ifndef PRODUCT
-  if (_field_values.length() > 0) {
-    _field_values.at(0)->print_on(st);
-  }
-  for (int i = 1; i < _field_values.length(); i++) {
-    st->print(", ");
-    _field_values.at(i)->print_on(st);
+  if (is_object_merge()) {
+    ObjectMergeValue* omv = (ObjectMergeValue*)this;
+    st->print("selector=\"");
+    omv->selector()->print_on(st);
+    st->print("\"");
+    ScopeValue* merge_pointer = omv->merge_pointer();
+    if (!(merge_pointer->is_object() && merge_pointer->as_ObjectValue()->value()() == nullptr) &&
+        !(merge_pointer->is_constant_oop() && merge_pointer->as_ConstantOopReadValue()->value()() == nullptr)) {
+      st->print(", merge_pointer=\"");
+      merge_pointer->print_on(st);
+      st->print("\"");
+    }
+    GrowableArray<ScopeValue*>* possible_objects = omv->possible_objects();
+    st->print(", candidate_objs=[%d", possible_objects->at(0)->as_ObjectValue()->id());
+    int ncandidates = possible_objects->length();
+    for (int i = 1; i < ncandidates; i++) {
+      st->print(", %d", possible_objects->at(i)->as_ObjectValue()->id());
+    }
+    st->print("]");
+  } else {
+    st->print("\n        Fields: ");
+    if (_field_values.length() > 0) {
+      _field_values.at(0)->print_on(st);
+    }
+    for (int i = 1; i < _field_values.length(); i++) {
+      st->print(", ");
+      _field_values.at(i)->print_on(st);
+    }
  }
 #endif
 }

+
+// ObjectMergeValue
+
+// Returns the ObjectValue that should be used for the local that this
+// ObjectMergeValue represents. ObjectMergeValue represents allocation
+// merges in C2. This method will select which path the allocation merge
+// took during execution of the Trap that triggered the rematerialization
+// of the object.
+ObjectValue* ObjectMergeValue::select(frame& fr, RegisterMap& reg_map) {
+  StackValue* sv_selector = StackValue::create_stack_value(&fr, &reg_map, _selector);
+  jint selector = sv_selector->get_int();
+
+  // If the selector is '-1' it means that execution followed the path
+  // where no scalar replacement happened.
+  // Otherwise, it is the index in _possible_objects array that holds
+  // the description of the scalar replaced object.
+  if (selector == -1) {
+    StackValue* sv_merge_pointer = StackValue::create_stack_value(&fr, &reg_map, _merge_pointer);
+    _selected = new ObjectValue(id());
+
+    // Retrieve the pointer to the real object and use it as if we had
+    // allocated it during the deoptimization
+    _selected->set_value(sv_merge_pointer->get_obj()());
+
+    // No need to rematerialize
+    return nullptr;
+  } else {
+    assert(selector < _possible_objects.length(), "sanity");
+    _selected = (ObjectValue*) _possible_objects.at(selector);
+    return _selected;
+  }
+}
+
+void ObjectMergeValue::read_object(DebugInfoReadStream* stream) {
+  _selector = read_from(stream);
+  _merge_pointer = read_from(stream);
+  int ncandidates = stream->read_int();
+  for (int i = 0; i < ncandidates; i++) {
+    ScopeValue* result = read_from(stream);
+    assert(result->is_object(), "Candidate is not an object!");
+    ObjectValue* obj = result->as_ObjectValue();
+    _possible_objects.append(obj);
+  }
+}
+
+void ObjectMergeValue::write_on(DebugInfoWriteStream* stream) {
+  if (is_visited()) {
+    stream->write_int(OBJECT_ID_CODE);
+    stream->write_int(_id);
+  } else {
+    set_visited(true);
+    stream->write_int(OBJECT_MERGE_CODE);
+    stream->write_int(_id);
+    _selector->write_on(stream);
+    _merge_pointer->write_on(stream);
+    int ncandidates = _possible_objects.length();
+    stream->write_int(ncandidates);
+    for (int i = 0; i < ncandidates; i++) {
+      _possible_objects.at(i)->as_ObjectValue()->write_on(stream);
+    }
+  }
+}
+
 // ConstantIntValue

 ConstantIntValue::ConstantIntValue(DebugInfoReadStream* stream) {
--- a/src/hotspot/share/code/debugInfo.hpp
+++ b/src/hotspot/share/code/debugInfo.hpp
@ -44,12 +44,14 @@ class ConstantOopReadValue;
 class ConstantOopWriteValue;
 class LocationValue;
 class ObjectValue;
+class ObjectMergeValue;

 class ScopeValue: public AnyObj {
 public:
  // Testers
  virtual bool is_location() const { return false; }
  virtual bool is_object() const { return false; }
+  virtual bool is_object_merge() const { return false; }
  virtual bool is_auto_box() const { return false; }
  virtual bool is_marker() const { return false; }
  virtual bool is_constant_int() const { return false; }
@ -73,6 +75,11 @@ class ScopeValue: public AnyObj {
    return (ObjectValue*)this;
  }

+  ObjectMergeValue* as_ObjectMergeValue() {
+    assert(is_object_merge(), "must be");
+    return (ObjectMergeValue*)this;
+  }
+
  LocationValue* as_LocationValue() {
    assert(is_location(), "must be");
    return (LocationValue*)this;
@ -126,13 +133,18 @@ class ObjectValue: public ScopeValue {
  GrowableArray<ScopeValue*> _field_values;
  Handle                     _value;
  bool                       _visited;
+  bool                       _is_root;   // Will be true if this object is referred to
+                                         // as a local/expression/monitor in the JVMs.
+                                         // Otherwise false, meaning it's just a candidate
+                                         // in an object allocation merge.
 public:
  ObjectValue(int id, ScopeValue* klass)
     : _id(id)
     , _klass(klass)
     , _field_values()
     , _value()
-     , _visited(false) {
+     , _visited(false)
+     , _is_root(true) {
    assert(klass->is_constant_oop(), "should be constant java mirror oop");
  }

@ -141,20 +153,24 @@ class ObjectValue: public ScopeValue {
     , _klass(nullptr)
     , _field_values()
     , _value()
-     , _visited(false) {}
+     , _visited(false)
+     , _is_root(true) {}

  // Accessors
-  bool                        is_object() const         { return true; }
-  int                         id() const                { return _id; }
-  ScopeValue*                 klass() const             { return _klass; }
-  GrowableArray<ScopeValue*>* field_values()            { return &_field_values; }
-  ScopeValue*                 field_at(int i) const     { return _field_values.at(i); }
-  int                         field_size()              { return _field_values.length(); }
-  Handle                      value() const             { return _value; }
-  bool                        is_visited() const        { return _visited; }
+  bool                        is_object() const           { return true; }
+  int                         id() const                  { return _id; }
+  virtual ScopeValue*         klass() const               { return _klass; }
+  virtual GrowableArray<ScopeValue*>* field_values()      { return &_field_values; }
+  virtual ScopeValue*         field_at(int i) const       { return _field_values.at(i); }
+  virtual int                 field_size()                { return _field_values.length(); }
+  virtual Handle              value() const               { return _value; }
+  bool                        is_visited() const          { return _visited; }
+  bool                        is_root() const             { return _is_root; }

-  void                        set_value(oop value);
-  void                        set_visited(bool visited) { _visited = visited; }
+  void                        set_id(int id)              { _id = id; }
+  virtual void                set_value(oop value);
+  void                        set_visited(bool visited)   { _visited = visited; }
+  void                        set_root(bool root)         { _is_root = root; }

  // Serialization of debugging information
  void read_object(DebugInfoReadStream* stream);
@ -165,6 +181,65 @@ class ObjectValue: public ScopeValue {
  void print_fields_on(outputStream* st) const;
 };

+// An ObjectMergeValue describes objects that were inputs to a Phi in C2 and at
+// least one of them was scalar replaced.
+// '_selector' is an integer value that will be '-1' if during the execution of
+// the C2 compiled code the path taken was that of the Phi input that was NOT
+// scalar replaced. In that case '_merge_pointer' is a pointer to an already
+// allocated object. If '_selector' is not '-1' then it should be the index of
+// an object in '_possible_objects'. That object is an ObjectValue describing an
+// object that was scalar replaced.
+
+class ObjectMergeValue: public ObjectValue {
+protected:
+  ScopeValue*                _selector;
+  ScopeValue*                _merge_pointer;
+  GrowableArray<ScopeValue*> _possible_objects;
+
+  // This holds the ObjectValue that should be used in place of this
+  // ObjectMergeValue. I.e., it's the ScopeValue from _possible_objects that was
+  // selected by 'select()' or is a on-the-fly created ScopeValue representing
+  // the _merge_pointer if _selector is -1.
+  //
+  // We need to keep this reference around because there will be entries in
+  // ScopeDesc that reference this ObjectMergeValue directly. After
+  // rematerialization ObjectMergeValue will be just a wrapper for the
+  // Objectvalue pointed by _selected.
+  ObjectValue*               _selected;
+public:
+  ObjectMergeValue(int id, ScopeValue* merge_pointer, ScopeValue* selector)
+     : ObjectValue(id)
+     , _selector(selector)
+     , _merge_pointer(merge_pointer)
+     , _possible_objects()
+     , _selected(nullptr) {}
+
+  ObjectMergeValue(int id)
+     : ObjectValue(id)
+     , _selector(nullptr)
+     , _merge_pointer(nullptr)
+     , _possible_objects()
+     , _selected(nullptr) {}
+
+  bool                        is_object_merge() const         { return true; }
+  ScopeValue*                 selector() const                { return _selector; }
+  ScopeValue*                 merge_pointer() const           { return _merge_pointer; }
+  GrowableArray<ScopeValue*>* possible_objects()              { return &_possible_objects; }
+  ObjectValue*                select(frame& fr, RegisterMap& reg_map) ;
+
+  ScopeValue*                 klass() const                   { ShouldNotReachHere(); return nullptr; }
+  GrowableArray<ScopeValue*>* field_values()                  { ShouldNotReachHere(); return nullptr; }
+  ScopeValue*                 field_at(int i) const           { ShouldNotReachHere(); return nullptr; }
+  int                         field_size()                    { ShouldNotReachHere(); return -1; }
+
+  Handle                      value() const                   { assert(_selected != nullptr, "Should call select() first."); return _selected->value(); }
+  void                        set_value(oop value)            { assert(_selected != nullptr, "Should call select() first."); _selected->set_value(value); }
+
+  // Serialization of debugging information
+  void read_object(DebugInfoReadStream* stream);
+  void write_on(DebugInfoWriteStream* stream);
+};
+
 class AutoBoxObjectValue : public ObjectValue {
  bool                       _cached;
 public:
@ -316,6 +391,7 @@ class DebugInfoReadStream : public CompressedReadStream {
    return o;
  }
  ScopeValue* read_object_value(bool is_auto_box);
+  ScopeValue* read_object_merge_value();
  ScopeValue* get_cached_object();
  // BCI encoding is mostly unsigned, but -1 is a distinguished value
  int read_bci() { return read_int() + InvocationEntryBci; }
--- a/src/hotspot/share/code/scopeDesc.cpp
+++ b/src/hotspot/share/code/scopeDesc.cpp
@ -114,7 +114,6 @@ GrowableArray<ScopeValue*>* ScopeDesc::decode_object_values(int decode_offset) {
    // object's fields could reference it (OBJECT_ID_CODE).
    (void)ScopeValue::read_from(stream);
  }
-  assert(result->length() == length, "inconsistent debug information");
  return result;
 }

@ -130,6 +129,38 @@ GrowableArray<MonitorValue*>* ScopeDesc::decode_monitor_values(int decode_offset
  return result;
 }

+GrowableArray<ScopeValue*>* ScopeDesc::objects_to_rematerialize(frame& frm, RegisterMap& map) {
+  if (_objects == nullptr) {
+    return nullptr;
+  }
+
+  GrowableArray<ScopeValue*>* result = new GrowableArray<ScopeValue*>();
+  for (int i = 0; i < _objects->length(); i++) {
+    assert(_objects->at(i)->is_object(), "invalid debug information");
+    ObjectValue* sv = _objects->at(i)->as_ObjectValue();
+
+    // If the object is not referenced in current JVM state, then it's only
+    // a candidate in an ObjectMergeValue, we don't need to rematerialize it
+    // unless when/if it's returned by 'select()' below.
+    if (!sv->is_root()) {
+      continue;
+    }
+
+    if (sv->is_object_merge()) {
+      sv = sv->as_ObjectMergeValue()->select(frm, map);
+      // If select() returns nullptr, then the object doesn't need to be
+      // rematerialized.
+      if (sv == nullptr) {
+        continue;
+      }
+    }
+
+    result->append_if_missing(sv);
+  }
+
+  return result;
+}
+
 DebugInfoReadStream* ScopeDesc::stream_at(int decode_offset) const {
  return new DebugInfoReadStream(_code, decode_offset, _objects);
 }
@ -238,8 +269,12 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
    st->print_cr("   Objects");
    for (int i = 0; i < _objects->length(); i++) {
      ObjectValue* sv = (ObjectValue*) _objects->at(i);
-      st->print("    - %d: ", sv->id());
-      st->print("%s ", java_lang_Class::as_Klass(sv->klass()->as_ConstantOopReadValue()->value()())->external_name());
+      st->print("    - %d: %c ", i, sv->is_root() ? 'R' : ' ');
+      sv->print_on(st);
+      st->print(", ");
+      if (!sv->is_object_merge()) {
+        st->print("%s", java_lang_Class::as_Klass(sv->klass()->as_ConstantOopReadValue()->value()())->external_name());
+      }
      sv->print_fields_on(st);
      st->cr();
    }
--- a/src/hotspot/share/code/scopeDesc.hpp
+++ b/src/hotspot/share/code/scopeDesc.hpp
@ -134,6 +134,7 @@ class ScopeDesc : public ResourceObj {
 public:
  // Verification
  void verify();
+  GrowableArray<ScopeValue*>* objects_to_rematerialize(frame& frm, RegisterMap& map);

 #ifndef PRODUCT
 public:
--- a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp
+++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp
@ -1498,6 +1498,7 @@ C2V_VMENTRY_NULL(jobject, iterateFrames, (JNIEnv* env, jobject compilerToVM, job
            GrowableArray<ScopeValue*>* local_values = scope->locals();
            for (int i = 0; i < local_values->length(); i++) {
              ScopeValue* value = local_values->at(i);
+              assert(!value->is_object_merge(), "Should not be.");
              if (value->is_object()) {
                if (localIsVirtual_h.is_null()) {
                  typeArrayOop array_oop = oopFactory::new_boolArray(local_values->length(), CHECK_NULL);
@ -1740,6 +1741,7 @@ C2V_VMENTRY(void, materializeVirtualObjects, (JNIEnv* env, jobject, jobject _hs_
    if (locals != nullptr) {
      for (int i2 = 0; i2 < locals->size(); i2++) {
        StackValue* var = locals->at(i2);
+        assert(!scopedValues->at(i2)->is_object_merge(), "Should not be.");
        if (var->type() == T_OBJECT && scopedValues->at(i2)->is_object()) {
          jvalue val;
          val.l = cast_from_oop<jobject>(locals->at(i2)->get_obj()());
@ -1753,6 +1755,7 @@ C2V_VMENTRY(void, materializeVirtualObjects, (JNIEnv* env, jobject, jobject _hs_
    if (expressions != nullptr) {
      for (int i2 = 0; i2 < expressions->size(); i2++) {
        StackValue* var = expressions->at(i2);
+        assert(!scopeExpressions->at(i2)->is_object_merge(), "Should not be.");
        if (var->type() == T_OBJECT && scopeExpressions->at(i2)->is_object()) {
          jvalue val;
          val.l = cast_from_oop<jobject>(expressions->at(i2)->get_obj()());
--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@ -467,6 +467,12 @@
  develop(bool, TracePostallocExpand, false, "Trace expanding nodes after"  \
          " register allocation.")                                          \
                                                                            \
+  product(bool, ReduceAllocationMerges, true, DIAGNOSTIC,                   \
+          "Try to simplify allocation merges before Scalar Replacement")    \
+                                                                            \
+  notproduct(bool, TraceReduceAllocationMerges, false,                      \
+          "Trace decision for simplifying allocation merges.")              \
+                                                                            \
  product(bool, DoEscapeAnalysis, true,                                     \
          "Perform escape analysis")                                        \
                                                                            \
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@ -52,6 +52,9 @@ const char* C2Compiler::retry_no_locks_coarsening() {
 const char* C2Compiler::retry_no_iterative_escape_analysis() {
  return "retry without iterative escape analysis";
 }
+const char* C2Compiler::retry_no_reduce_allocation_merges() {
+  return "retry without reducing allocation merges";
+}

 void compiler_stubs_init(bool in_compiler_thread);

@ -106,12 +109,13 @@ void C2Compiler::compile_method(ciEnv* env, ciMethod* target, int entry_bci, boo
  bool subsume_loads = SubsumeLoads;
  bool do_escape_analysis = DoEscapeAnalysis;
  bool do_iterative_escape_analysis = DoEscapeAnalysis;
+  bool do_reduce_allocation_merges = ReduceAllocationMerges;
  bool eliminate_boxing = EliminateAutoBox;
  bool do_locks_coarsening = EliminateLocks;

  while (!env->failing()) {
    // Attempt to compile while subsuming loads into machine instructions.
-    Options options(subsume_loads, do_escape_analysis, do_iterative_escape_analysis, eliminate_boxing, do_locks_coarsening, install_code);
+    Options options(subsume_loads, do_escape_analysis, do_iterative_escape_analysis, do_reduce_allocation_merges, eliminate_boxing, do_locks_coarsening, install_code);
    Compile C(env, target, entry_bci, options, directive);

    // Check result and retry if appropriate.
@ -134,6 +138,12 @@ void C2Compiler::compile_method(ciEnv* env, ciMethod* target, int entry_bci, boo
        env->report_failure(C.failure_reason());
        continue;  // retry
      }
+      if (C.failure_reason_is(retry_no_reduce_allocation_merges())) {
+        assert(do_reduce_allocation_merges, "must make progress");
+        do_reduce_allocation_merges = false;
+        env->report_failure(C.failure_reason());
+        continue;  // retry
+      }
      if (C.failure_reason_is(retry_no_locks_coarsening())) {
        assert(do_locks_coarsening, "must make progress");
        do_locks_coarsening = false;
--- a/src/hotspot/share/opto/c2compiler.hpp
+++ b/src/hotspot/share/opto/c2compiler.hpp
@ -50,6 +50,7 @@ public:
  static const char* retry_no_subsuming_loads();
  static const char* retry_no_escape_analysis();
  static const char* retry_no_iterative_escape_analysis();
+  static const char* retry_no_reduce_allocation_merges();
  static const char* retry_no_locks_coarsening();

  // Print compilation timers and statistics
--- a/src/hotspot/share/opto/callnode.cpp
+++ b/src/hotspot/share/opto/callnode.cpp
@ -1103,13 +1103,16 @@ Node* CallStaticJavaNode::Ideal(PhaseGVN* phase, bool can_reshape) {
  return CallNode::Ideal(phase, can_reshape);
 }

+//----------------------------is_uncommon_trap----------------------------
+// Returns true if this is an uncommon trap.
+bool CallStaticJavaNode::is_uncommon_trap() const {
+  return (_name != nullptr && !strcmp(_name, "uncommon_trap"));
+}
+
 //----------------------------uncommon_trap_request----------------------------
 // If this is an uncommon trap, return the request code, else zero.
 int CallStaticJavaNode::uncommon_trap_request() const {
-  if (_name != nullptr && !strcmp(_name, "uncommon_trap")) {
-    return extract_uncommon_trap_request(this);
-  }
-  return 0;
+  return is_uncommon_trap() ? extract_uncommon_trap_request(this) : 0;
 }
 int CallStaticJavaNode::extract_uncommon_trap_request(const Node* call) {
 #ifndef PRODUCT
@ -1460,22 +1463,14 @@ void SafePointNode::disconnect_from_root(PhaseIterGVN *igvn) {

 //==============  SafePointScalarObjectNode  ==============

-SafePointScalarObjectNode::SafePointScalarObjectNode(const TypeOopPtr* tp,
-#ifdef ASSERT
-                                                     Node* alloc,
-#endif
-                                                     uint first_index,
-                                                     uint n_fields) :
+SafePointScalarObjectNode::SafePointScalarObjectNode(const TypeOopPtr* tp, Node* alloc, uint first_index, uint n_fields) :
  TypeNode(tp, 1), // 1 control input -- seems required.  Get from root.
  _first_index(first_index),
-  _n_fields(n_fields)
-#ifdef ASSERT
-  , _alloc(alloc)
-#endif
+  _n_fields(n_fields),
+  _alloc(alloc)
 {
 #ifdef ASSERT
-  if (!alloc->is_Allocate()
-      && !(alloc->Opcode() == Op_VectorBox)) {
+  if (!alloc->is_Allocate() && !(alloc->Opcode() == Op_VectorBox)) {
    alloc->dump();
    assert(false, "unexpected call node");
  }
@ -1521,10 +1516,58 @@ SafePointScalarObjectNode::clone(Dict* sosn_map, bool& new_node) const {

 #ifndef PRODUCT
 void SafePointScalarObjectNode::dump_spec(outputStream *st) const {
-  st->print(" # fields@[%d..%d]", first_index(),
-             first_index() + n_fields() - 1);
+  st->print(" # fields@[%d..%d]", first_index(), first_index() + n_fields() - 1);
+}
+#endif
+
+//==============  SafePointScalarMergeNode  ==============
+
+SafePointScalarMergeNode::SafePointScalarMergeNode(const TypeOopPtr* tp, int merge_pointer_idx) :
+  TypeNode(tp, 1), // 1 control input -- seems required.  Get from root.
+  _merge_pointer_idx(merge_pointer_idx)
+{
+  init_class_id(Class_SafePointScalarMerge);
 }

+// Do not allow value-numbering for SafePointScalarMerge node.
+uint SafePointScalarMergeNode::hash() const { return NO_HASH; }
+bool SafePointScalarMergeNode::cmp( const Node &n ) const {
+  return (&n == this); // Always fail except on self
+}
+
+uint SafePointScalarMergeNode::ideal_reg() const {
+  return 0; // No matching to machine instruction
+}
+
+const RegMask &SafePointScalarMergeNode::in_RegMask(uint idx) const {
+  return *(Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()]);
+}
+
+const RegMask &SafePointScalarMergeNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+uint SafePointScalarMergeNode::match_edge(uint idx) const {
+  return 0;
+}
+
+SafePointScalarMergeNode*
+SafePointScalarMergeNode::clone(Dict* sosn_map, bool& new_node) const {
+  void* cached = (*sosn_map)[(void*)this];
+  if (cached != nullptr) {
+    new_node = false;
+    return (SafePointScalarMergeNode*)cached;
+  }
+  new_node = true;
+  SafePointScalarMergeNode* res = (SafePointScalarMergeNode*)Node::clone();
+  sosn_map->Insert((void*)this, (void*)res);
+  return res;
+}
+
+#ifndef PRODUCT
+void SafePointScalarMergeNode::dump_spec(outputStream *st) const {
+  st->print(" # merge_pointer_idx=%d, scalarized_objects=%d", _merge_pointer_idx, req()-1);
+}
 #endif

 //=============================================================================
--- a/src/hotspot/share/opto/callnode.hpp
+++ b/src/hotspot/share/opto/callnode.hpp
@ -505,25 +505,22 @@ public:
 //------------------------------SafePointScalarObjectNode----------------------
 // A SafePointScalarObjectNode represents the state of a scalarized object
 // at a safepoint.
-
 class SafePointScalarObjectNode: public TypeNode {
-  uint _first_index; // First input edge relative index of a SafePoint node where
-                     // states of the scalarized object fields are collected.
-                     // It is relative to the last (youngest) jvms->_scloff.
-  uint _n_fields;    // Number of non-static fields of the scalarized object.
-  DEBUG_ONLY(Node* _alloc;)
+  uint _first_index;              // First input edge relative index of a SafePoint node where
+                                  // states of the scalarized object fields are collected.
+                                  // It is relative to the last (youngest) jvms->_scloff.
+  uint _n_fields;                 // Number of non-static fields of the scalarized object.

-  virtual uint hash() const ; // { return NO_HASH; }
+  Node* _alloc;                   // Just for debugging purposes.
+
+  virtual uint hash() const;
  virtual bool cmp( const Node &n ) const;

  uint first_index() const { return _first_index; }

 public:
-  SafePointScalarObjectNode(const TypeOopPtr* tp,
-#ifdef ASSERT
-                            Node* alloc,
-#endif
-                            uint first_index, uint n_fields);
+  SafePointScalarObjectNode(const TypeOopPtr* tp, Node* alloc, uint first_index, uint n_fields);
+
  virtual int Opcode() const;
  virtual uint           ideal_reg() const;
  virtual const RegMask &in_RegMask(uint) const;
@ -556,6 +553,92 @@ public:
 #endif
 };

+//------------------------------SafePointScalarMergeNode----------------------
+//
+// This class represents an allocation merge that is used as debug information
+// and had at least one of its input scalar replaced.
+//
+// The required inputs of this node, except the control, are pointers to
+// SafePointScalarObjectNodes that describe scalarized inputs of the original
+// allocation merge. The other(s) properties of the class are described below.
+//
+// _merge_pointer_idx : index in the SafePointNode's input array where the
+//   description of the _allocation merge_ starts. The index is zero based and
+//   relative to the SafePoint's scloff. The two entries in the SafePointNode's
+//   input array starting at '_merge_pointer_idx` are Phi nodes representing:
+//
+//   1) The original merge Phi. During rematerialization this input will only be
+//   used if the "selector Phi" (see below) indicates that the execution of the
+//   Phi took the path of a non scalarized input.
+//
+//   2) A "selector Phi". The output of this Phi will be '-1' if the execution
+//   of the method exercised a non scalarized input of the original Phi.
+//   Otherwise, the output will be >=0, and it will indicate the index-1 in the
+//   SafePointScalarMergeNode input array where the description of the
+//   scalarized object that should be used is.
+//
+// As an example, consider a Phi merging 3 inputs, of which the last 2 are
+// scalar replaceable.
+//
+//    Phi(Region, NSR, SR, SR)
+//
+// During scalar replacement the SR inputs will be changed to null:
+//
+//    Phi(Region, NSR, nullptr, nullptr)
+//
+// A corresponding selector Phi will be created with a configuration like this:
+//
+//    Phi(Region, -1, 0, 1)
+//
+// During execution of the compiled method, if the execution reaches a Trap, the
+// output of the selector Phi will tell if we need to rematerialize one of the
+// scalar replaced inputs or if we should just use the pointer returned by the
+// original Phi.
+
+class SafePointScalarMergeNode: public TypeNode {
+  int _merge_pointer_idx;         // This is the first input edge relative
+                                  // index of a SafePoint node where metadata information relative
+                                  // to restoring the merge is stored. The corresponding input
+                                  // in the associated SafePoint will point to a Phi representing
+                                  // potential non-scalar replaced objects.
+
+  virtual uint hash() const;
+  virtual bool cmp( const Node &n ) const;
+
+public:
+  SafePointScalarMergeNode(const TypeOopPtr* tp, int merge_pointer_idx);
+
+  virtual int            Opcode() const;
+  virtual uint           ideal_reg() const;
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual const RegMask &out_RegMask() const;
+  virtual uint           match_edge(uint idx) const;
+
+  virtual uint size_of() const { return sizeof(*this); }
+
+  int merge_pointer_idx(JVMState* jvms) const {
+    assert(jvms != nullptr, "JVMS reference is null.");
+    return jvms->scloff() + _merge_pointer_idx;
+  }
+
+  int selector_idx(JVMState* jvms) const {
+    assert(jvms != nullptr, "JVMS reference is null.");
+    return jvms->scloff() + _merge_pointer_idx + 1;
+  }
+
+  // Assumes that "this" is an argument to a safepoint node "s", and that
+  // "new_call" is being created to correspond to "s".  But the difference
+  // between the start index of the jvmstates of "new_call" and "s" is
+  // "jvms_adj".  Produce and return a SafePointScalarObjectNode that
+  // corresponds appropriately to "this" in "new_call".  Assumes that
+  // "sosn_map" is a map, specific to the translation of "s" to "new_call",
+  // mapping old SafePointScalarObjectNodes to new, to avoid multiple copies.
+  SafePointScalarMergeNode* clone(Dict* sosn_map, bool& new_node) const;
+
+#ifndef PRODUCT
+  virtual void              dump_spec(outputStream *st) const;
+#endif
+};

 // Simple container for the outgoing projections of a call.  Useful
 // for serious surgery on calls.
@ -735,6 +818,7 @@ public:

  // If this is an uncommon trap, return the request code, else zero.
  int uncommon_trap_request() const;
+  bool is_uncommon_trap() const;
  static int extract_uncommon_trap_request(const Node* call);

  bool is_boxing_method() const {
--- a/src/hotspot/share/opto/classes.hpp
+++ b/src/hotspot/share/opto/classes.hpp
@ -312,6 +312,7 @@ macro(RotateRight)
 macro(RotateRightV)
 macro(SafePoint)
 macro(SafePointScalarObject)
+macro(SafePointScalarMerge)
 #if INCLUDE_SHENANDOAHGC
 #define shmacro(x) macro(x)
 #else
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -519,6 +519,12 @@ void Compile::print_compile_messages() {
    tty->print_cr("** Bailout: Recompile without iterative escape analysis**");
    tty->print_cr("*********************************************************");
  }
+  if (do_reduce_allocation_merges() != ReduceAllocationMerges && PrintOpto) {
+    // Recompiling without reducing allocation merges
+    tty->print_cr("*********************************************************");
+    tty->print_cr("** Bailout: Recompile without reduce allocation merges **");
+    tty->print_cr("*********************************************************");
+  }
  if ((eliminate_boxing() != EliminateAutoBox) && PrintOpto) {
    // Recompiling without boxing elimination
    tty->print_cr("*********************************************************");
@ -2301,10 +2307,10 @@ void Compile::Optimize() {
      // Cleanup graph (remove dead nodes).
      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
      PhaseIdealLoop::optimize(igvn, LoopOptsMaxUnroll);
-      if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
      if (failing())  return;
    }
    bool progress;
+    print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
    do {
      ConnectionGraph::do_analysis(this, &igvn);

@ -2326,9 +2332,11 @@ void Compile::Optimize() {

        igvn.optimize();
        print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);
-
-        if (failing())  return;
      }
+
+      ConnectionGraph::verify_ram_nodes(this, root());
+      if (failing())  return;
+
      progress = do_iterative_escape_analysis() &&
                 (macro_count() < mcount) &&
                 ConnectionGraph::has_candidates(this);
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@ -176,17 +176,20 @@ class Options {
  const bool _subsume_loads;         // Load can be matched as part of a larger op.
  const bool _do_escape_analysis;    // Do escape analysis.
  const bool _do_iterative_escape_analysis;  // Do iterative escape analysis.
+  const bool _do_reduce_allocation_merges;  // Do try to reduce allocation merges.
  const bool _eliminate_boxing;      // Do boxing elimination.
  const bool _do_locks_coarsening;   // Do locks coarsening
  const bool _install_code;          // Install the code that was compiled
 public:
  Options(bool subsume_loads, bool do_escape_analysis,
          bool do_iterative_escape_analysis,
+          bool do_reduce_allocation_merges,
          bool eliminate_boxing, bool do_locks_coarsening,
          bool install_code) :
          _subsume_loads(subsume_loads),
          _do_escape_analysis(do_escape_analysis),
          _do_iterative_escape_analysis(do_iterative_escape_analysis),
+          _do_reduce_allocation_merges(do_reduce_allocation_merges),
          _eliminate_boxing(eliminate_boxing),
          _do_locks_coarsening(do_locks_coarsening),
          _install_code(install_code) {
@ -197,6 +200,7 @@ class Options {
       /* subsume_loads = */ true,
       /* do_escape_analysis = */ false,
       /* do_iterative_escape_analysis = */ false,
+       /* do_reduce_allocation_merges = */ false,
       /* eliminate_boxing = */ false,
       /* do_lock_coarsening = */ false,
       /* install_code = */ true
@ -565,6 +569,7 @@ private:
  /** Do escape analysis. */
  bool              do_escape_analysis() const  { return _options._do_escape_analysis; }
  bool              do_iterative_escape_analysis() const  { return _options._do_iterative_escape_analysis; }
+  bool              do_reduce_allocation_merges() const  { return _options._do_reduce_allocation_merges; }
  /** Do boxing elimination. */
  bool              eliminate_boxing() const    { return _options._eliminate_boxing; }
  /** Do aggressive boxing elimination. */
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@ -36,13 +36,19 @@
 #include "opto/cfgnode.hpp"
 #include "opto/compile.hpp"
 #include "opto/escape.hpp"
+#include "opto/macro.hpp"
 #include "opto/phaseX.hpp"
 #include "opto/movenode.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/macros.hpp"

 ConnectionGraph::ConnectionGraph(Compile * C, PhaseIterGVN *igvn, int invocation) :
-  _nodes(C->comp_arena(), C->unique(), C->unique(), nullptr),
+  // If ReduceAllocationMerges is enabled we might call split_through_phi during
+  // split_unique_types and that will create additional nodes that need to be
+  // pushed to the ConnectionGraph. The code below bumps the initial capacity of
+  // _nodes by 10% to account for these additional nodes. If capacity is exceeded
+  // the array will be reallocated.
+  _nodes(C->comp_arena(), ReduceAllocationMerges ? C->unique()*1.10 : C->unique(), C->unique(), nullptr),
  _in_worklist(C->comp_arena()),
  _next_pidx(0),
  _collecting(true),
@ -56,11 +62,13 @@ ConnectionGraph::ConnectionGraph(Compile * C, PhaseIterGVN *igvn, int invocation
  // Add unknown java object.
  add_java_object(C->top(), PointsToNode::GlobalEscape);
  phantom_obj = ptnode_adr(C->top()->_idx)->as_JavaObject();
+  set_not_scalar_replaceable(phantom_obj NOT_PRODUCT(COMMA "Phantom object"));
  // Add ConP and ConN null oop nodes
  Node* oop_null = igvn->zerocon(T_OBJECT);
  assert(oop_null->_idx < nodes_size(), "should be created already");
  add_java_object(oop_null, PointsToNode::NoEscape);
  null_obj = ptnode_adr(oop_null->_idx)->as_JavaObject();
+  set_not_scalar_replaceable(null_obj NOT_PRODUCT(COMMA "Null object"));
  if (UseCompressedOops) {
    Node* noop_null = igvn->zerocon(T_NARROWOOP);
    assert(noop_null->_idx < nodes_size(), "should be created already");
@ -124,6 +132,7 @@ bool ConnectionGraph::compute_escape() {

  // Worklists used by EA.
  Unique_Node_List delayed_worklist;
+  Unique_Node_List reducible_merges;
  GrowableArray<Node*> alloc_worklist;
  GrowableArray<Node*> ptr_cmp_worklist;
  GrowableArray<MemBarStoreStoreNode*> storestore_worklist;
@ -292,7 +301,7 @@ bool ConnectionGraph::compute_escape() {
      n->as_Allocate()->_is_non_escaping = noescape;
    }
    if (noescape && ptn->scalar_replaceable()) {
-      adjust_scalar_replaceable_state(ptn);
+      adjust_scalar_replaceable_state(ptn, reducible_merges);
      if (ptn->scalar_replaceable()) {
        jobj_worklist.push(ptn);
      } else {
@ -306,6 +315,15 @@ bool ConnectionGraph::compute_escape() {
    find_scalar_replaceable_allocs(jobj_worklist);
  }

+  // alloc_worklist will be processed in reverse push order.
+  // Therefore the reducible Phis will be processed for last and that's what we
+  // want because by then the scalarizable inputs of the merge will already have
+  // an unique instance type.
+  for (uint i = 0; i < reducible_merges.size(); i++ ) {
+    Node* n = reducible_merges.at(i);
+    alloc_worklist.append(n);
+  }
+
  for (int next = 0; next < jobj_worklist.length(); ++next) {
    JavaObjectNode* jobj = jobj_worklist.at(next);
    if (jobj->scalar_replaceable()) {
@ -359,7 +377,7 @@ bool ConnectionGraph::compute_escape() {
    assert(C->do_aliasing(), "Aliasing should be enabled");
    // Now use the escape information to create unique types for
    // scalar replaceable objects.
-    split_unique_types(alloc_worklist, arraycopy_worklist, mergemem_worklist);
+    split_unique_types(alloc_worklist, arraycopy_worklist, mergemem_worklist, reducible_merges);
    if (C->failing()) {
      NOT_PRODUCT(escape_state_statistics(java_objects_worklist);)
      return false;
@ -379,6 +397,21 @@ bool ConnectionGraph::compute_escape() {
 #endif
  }

+  // 6. Remove reducible allocation merges from ideal graph
+  if (ReduceAllocationMerges && reducible_merges.size() > 0) {
+    bool delay = _igvn->delay_transform();
+    _igvn->set_delay_transform(true);
+    for (uint i = 0; i < reducible_merges.size(); i++ ) {
+      Node* n = reducible_merges.at(i);
+      reduce_phi(n->as_Phi());
+      if (C->failing()) {
+        NOT_PRODUCT(escape_state_statistics(java_objects_worklist);)
+        return false;
+      }
+    }
+    _igvn->set_delay_transform(delay);
+  }
+
  // Annotate at safepoints if they have <= ArgEscape objects in their scope and at
  // java calls if they pass ArgEscape objects as parameters.
  if (has_non_escaping_obj &&
@ -401,6 +434,345 @@ bool ConnectionGraph::compute_escape() {
  return has_non_escaping_obj;
 }

+// Check if it's profitable to reduce the Phi passed as parameter.  Returns true
+// if at least one scalar replaceable allocation participates in the merge and
+// no input to the Phi is nullable.
+bool ConnectionGraph::can_reduce_phi_check_inputs(PhiNode* ophi) const {
+  // Check if there is a scalar replaceable allocate in the Phi
+  bool found_sr_allocate = false;
+
+  for (uint i = 1; i < ophi->req(); i++) {
+    // Right now we can't restore a "null" pointer during deoptimization
+    const Type* inp_t = _igvn->type(ophi->in(i));
+    if (inp_t == nullptr || inp_t->make_oopptr() == nullptr || inp_t->make_oopptr()->maybe_null()) {
+      NOT_PRODUCT(if (TraceReduceAllocationMerges) tty->print_cr("Can NOT reduce Phi %d on invocation %d. Input %d is nullable.", ophi->_idx, _invocation, i);)
+      return false;
+    }
+
+    // We are looking for at least one SR object in the merge
+    JavaObjectNode* ptn = unique_java_object(ophi->in(i));
+    if (ptn != nullptr && ptn->scalar_replaceable()) {
+      assert(ptn->ideal_node() != nullptr && ptn->ideal_node()->is_Allocate(), "sanity");
+      AllocateNode* alloc = ptn->ideal_node()->as_Allocate();
+
+      if (PhaseMacroExpand::can_eliminate_allocation(_igvn, alloc, nullptr)) {
+        found_sr_allocate = true;
+      } else {
+        ptn->set_scalar_replaceable(false);
+      }
+    }
+  }
+
+  NOT_PRODUCT(if (TraceReduceAllocationMerges && !found_sr_allocate) tty->print_cr("Can NOT reduce Phi %d on invocation %d. No SR Allocate as input.", ophi->_idx, _invocation);)
+  return found_sr_allocate;
+}
+
+// Check if we are able to untangle the merge. Right now we only reduce Phis
+// which are only used as debug information.
+bool ConnectionGraph::can_reduce_phi_check_users(PhiNode* ophi) const {
+  for (DUIterator_Fast imax, i = ophi->fast_outs(imax); i < imax; i++) {
+    Node* use = ophi->fast_out(i);
+
+    if (use->is_SafePoint()) {
+      if (use->is_Call() && use->as_Call()->has_non_debug_use(ophi)) {
+        NOT_PRODUCT(if (TraceReduceAllocationMerges) tty->print_cr("Can NOT reduce Phi %d on invocation %d. Call has non_debug_use().", ophi->_idx, _invocation);)
+        return false;
+      }
+    } else if (use->is_AddP()) {
+      Node* addp = use;
+      for (DUIterator_Fast jmax, j = addp->fast_outs(jmax); j < jmax; j++) {
+        Node* use_use = addp->fast_out(j);
+        if (!use_use->is_Load() || !use_use->as_Load()->can_split_through_phi_base(_igvn)) {
+          NOT_PRODUCT(if (TraceReduceAllocationMerges) tty->print_cr("Can NOT reduce Phi %d on invocation %d. AddP user isn't a [splittable] Load(): %s", ophi->_idx, _invocation, use_use->Name());)
+          return false;
+        }
+      }
+    } else {
+      NOT_PRODUCT(if (TraceReduceAllocationMerges) tty->print_cr("Can NOT reduce Phi %d on invocation %d. One of the uses is: %d %s", ophi->_idx, _invocation, use->_idx, use->Name());)
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Returns true if: 1) It's profitable to reduce the merge, and 2) The Phi is
+// only used in some certain code shapes. Check comments in
+// 'can_reduce_phi_inputs' and 'can_reduce_phi_users' for more
+// details.
+bool ConnectionGraph::can_reduce_phi(PhiNode* ophi) const {
+  // If there was an error attempting to reduce allocation merges for this
+  // method we might have disabled the compilation and be retrying
+  // with RAM disabled.
+  if (!_compile->do_reduce_allocation_merges()) {
+    return false;
+  }
+
+  const Type* phi_t = _igvn->type(ophi);
+  if (phi_t == nullptr || phi_t->make_ptr() == nullptr ||
+                          phi_t->make_ptr()->isa_instptr() == nullptr ||
+                          !phi_t->make_ptr()->isa_instptr()->klass_is_exact()) {
+    NOT_PRODUCT(if (TraceReduceAllocationMerges) { tty->print_cr("Can NOT reduce Phi %d during invocation %d because it's nullable.", ophi->_idx, _invocation); })
+    return false;
+  }
+
+  if (!can_reduce_phi_check_inputs(ophi) || !can_reduce_phi_check_users(ophi)) {
+    return false;
+  }
+
+  NOT_PRODUCT(if (TraceReduceAllocationMerges) { tty->print_cr("Can reduce Phi %d during invocation %d: ", ophi->_idx, _invocation); })
+  return true;
+}
+
+void ConnectionGraph::reduce_phi_on_field_access(PhiNode* ophi, GrowableArray<Node *>  &alloc_worklist) {
+  // We'll pass this to 'split_through_phi' so that it'll do the split even
+  // though the load doesn't have an unique instance type.
+  bool ignore_missing_instance_id = true;
+
+  // Iterate over Phi outputs looking for an AddP
+  for (int j = ophi->outcnt()-1; j >= 0;) {
+    Node* previous_addp = ophi->raw_out(j);
+    uint num_edges = 1;
+    if (previous_addp->is_AddP()) {
+      // All AddPs are present in the connection graph
+      FieldNode* fn = ptnode_adr(previous_addp->_idx)->as_Field();
+      num_edges = previous_addp->in(AddPNode::Address) == previous_addp->in(AddPNode::Base) ? 2 : 1;
+
+      // Iterate over AddP looking for a Load
+      for (int k = previous_addp->outcnt()-1; k >= 0;) {
+        Node* previous_load = previous_addp->raw_out(k);
+        if (previous_load->is_Load()) {
+          Node* data_phi = previous_load->as_Load()->split_through_phi(_igvn, ignore_missing_instance_id);
+          _igvn->replace_node(previous_load, data_phi);
+          assert(data_phi != nullptr, "Output of split_through_phi is null.");
+          assert(data_phi != previous_load, "Output of split_through_phi is same as input.");
+
+          // Push the newly created AddP on alloc_worklist and patch
+          // the connection graph. Note that the changes in the CG below
+          // won't affect the ES of objects since the new nodes have the
+          // same status as the old ones.
+          if (data_phi != nullptr && data_phi->is_Phi()) {
+            for (uint i = 1; i < data_phi->req(); i++) {
+              Node* new_load = data_phi->in(i);
+              if (new_load->is_Load()) {
+                Node* new_addp = new_load->in(MemNode::Address);
+                Node* base = get_addp_base(new_addp);
+
+                // The base might not be something that we can create an unique
+                // type for. If that's the case we are done with that input.
+                PointsToNode* jobj_ptn = unique_java_object(base);
+                if (jobj_ptn == nullptr || !jobj_ptn->scalar_replaceable()) {
+                  continue;
+                }
+
+                // Push to alloc_worklist since the base has an unique_type
+                alloc_worklist.append_if_missing(new_addp);
+
+                // Now let's add the node to the connection graph
+                _nodes.at_grow(new_addp->_idx, nullptr);
+                add_field(new_addp, fn->escape_state(), fn->offset());
+                add_base(ptnode_adr(new_addp->_idx)->as_Field(), ptnode_adr(base->_idx));
+
+                // If the load doesn't load an object then it won't be
+                // part of the connection graph
+                PointsToNode* curr_load_ptn = ptnode_adr(previous_load->_idx);
+                if (curr_load_ptn != nullptr) {
+                  _nodes.at_grow(new_load->_idx, nullptr);
+                  add_local_var(new_load, curr_load_ptn->escape_state());
+                  add_edge(ptnode_adr(new_load->_idx), ptnode_adr(new_addp->_idx)->as_Field());
+                }
+              }
+            }
+          }
+        }
+        --k;
+        k = MIN2(k, (int)previous_addp->outcnt()-1);
+      }
+
+      // Remove the old AddP from the processing list because it's dead now
+      alloc_worklist.remove_if_existing(previous_addp);
+    }
+    j -= num_edges;
+    j = MIN2(j, (int)ophi->outcnt()-1);
+  }
+}
+
+// This method will create a SafePointScalarObjectNode for each combination of
+// scalar replaceable allocation in 'ophi' and SafePoint node in 'safepoints'.
+// The method will create a SafePointScalarMERGEnode for each combination of
+// 'ophi' and SafePoint node in 'safepoints'.
+// Each SafePointScalarMergeNode created here may describe multiple scalar
+// replaced objects - check detailed description in SafePointScalarMergeNode
+// class header.
+//
+// This method will set entries in the Phi that are scalar replaceable to 'null'.
+void ConnectionGraph::reduce_phi_on_safepoints(PhiNode* ophi, Unique_Node_List* safepoints) {
+  Node* minus_one           = _igvn->register_new_node_with_optimizer(ConINode::make(-1));
+  Node* selector            = _igvn->register_new_node_with_optimizer(PhiNode::make(ophi->region(), minus_one, TypeInt::INT));
+  Node* null_ptr            = _igvn->makecon(TypePtr::NULL_PTR);
+  const TypeOopPtr* merge_t = _igvn->type(ophi)->make_oopptr();
+  uint number_of_sr_objects = 0;
+  PhaseMacroExpand mexp(*_igvn);
+
+  _igvn->hash_delete(ophi);
+
+  // Fill in the 'selector' Phi. If index 'i' of the selector is:
+  // -> a '-1' constant, the i'th input of the original Phi is NSR.
+  // -> a 'x' constant >=0, the i'th input of of original Phi will be SR and the
+  //    info about the scalarized object will be at index x of
+  //    ObjectMergeValue::possible_objects
+  for (uint i = 1; i < ophi->req(); i++) {
+    Node* base          = ophi->in(i);
+    JavaObjectNode* ptn = unique_java_object(base);
+
+    if (ptn != nullptr && ptn->scalar_replaceable()) {
+      Node* sr_obj_idx = _igvn->register_new_node_with_optimizer(ConINode::make(number_of_sr_objects));
+      selector->set_req(i, sr_obj_idx);
+      number_of_sr_objects++;
+    }
+  }
+
+  // Update the debug information of all safepoints in turn
+  for (uint spi = 0; spi < safepoints->size(); spi++) {
+    SafePointNode* sfpt = safepoints->at(spi)->as_SafePoint();
+    JVMState *jvms      = sfpt->jvms();
+    uint merge_idx      = (sfpt->req() - jvms->scloff());
+    int debug_start     = jvms->debug_start();
+
+    SafePointScalarMergeNode* smerge = new SafePointScalarMergeNode(merge_t, merge_idx);
+    smerge->init_req(0, _compile->root());
+    _igvn->register_new_node_with_optimizer(smerge);
+
+    // The next two inputs are:
+    //  (1) A copy of the original pointer to NSR objects.
+    //  (2) A selector, used to decide if we need to rematerialize an object
+    //      or use the pointer to a NSR object.
+    // See more details of these fields in the declaration of SafePointScalarMergeNode
+    sfpt->add_req(ophi);
+    sfpt->add_req(selector);
+
+    for (uint i = 1; i < ophi->req(); i++) {
+      Node* base          = ophi->in(i);
+      JavaObjectNode* ptn = unique_java_object(base);
+
+      // If the base is not scalar replaceable we don't need to register information about
+      // it at this time.
+      if (ptn == nullptr || !ptn->scalar_replaceable()) {
+        continue;
+      }
+
+      AllocateNode* alloc = ptn->ideal_node()->as_Allocate();
+      SafePointScalarObjectNode* sobj = mexp.create_scalarized_object_description(alloc, sfpt);
+      if (sobj == nullptr) {
+        _compile->record_failure(C2Compiler::retry_no_reduce_allocation_merges());
+        return;
+      }
+
+      // Now make a pass over the debug information replacing any references
+      // to the allocated object with "sobj"
+      Node* ccpp = alloc->result_cast();
+      sfpt->replace_edges_in_range(ccpp, sobj, debug_start, jvms->debug_end(), _igvn);
+
+      // Register the scalarized object as a candidate for reallocation
+      smerge->add_req(sobj);
+    }
+
+    // Replaces debug information references to "ophi" in "sfpt" with references to "smerge"
+    sfpt->replace_edges_in_range(ophi, smerge, debug_start, jvms->debug_end(), _igvn);
+
+    // The call to 'replace_edges_in_range' above might have removed the
+    // reference to ophi that we need at _merge_pointer_idx. The line below make
+    // sure the reference is maintained.
+    sfpt->set_req(smerge->merge_pointer_idx(jvms), ophi);
+    _igvn->_worklist.push(sfpt);
+  }
+
+  // Now we can change ophi since we don't need to know the types
+  // of the input allocations anymore.
+  const Type* new_t = merge_t->meet(TypePtr::NULL_PTR);
+  Node* new_phi = _igvn->register_new_node_with_optimizer(PhiNode::make(ophi->region(), null_ptr, new_t));
+  for (uint i = 1; i < ophi->req(); i++) {
+    Node* base          = ophi->in(i);
+    JavaObjectNode* ptn = unique_java_object(base);
+
+    if (ptn != nullptr && ptn->scalar_replaceable()) {
+      new_phi->set_req(i, null_ptr);
+    } else {
+      new_phi->set_req(i, ophi->in(i));
+    }
+  }
+
+  _igvn->replace_node(ophi, new_phi);
+  _igvn->hash_insert(ophi);
+  _igvn->_worklist.push(ophi);
+}
+
+void ConnectionGraph::reduce_phi(PhiNode* ophi) {
+  Unique_Node_List safepoints;
+
+  for (uint i = 0; i < ophi->outcnt(); i++) {
+    Node* use = ophi->raw_out(i);
+
+    // All SafePoint nodes using the same Phi node use the same debug
+    // information (regarding the Phi). Furthermore, reducing the Phi used by a
+    // SafePoint requires changing the Phi. Therefore, I collect all safepoints
+    // and patch them all at once later.
+    if (use->is_SafePoint()) {
+      safepoints.push(use->as_SafePoint());
+    } else {
+      assert(false, "Unexpected use of reducible Phi.");
+    }
+  }
+
+  if (safepoints.size() > 0) {
+    reduce_phi_on_safepoints(ophi, &safepoints);
+  }
+}
+
+void ConnectionGraph::verify_ram_nodes(Compile* C, Node* root) {
+  Unique_Node_List ideal_nodes;
+
+  ideal_nodes.map(C->live_nodes(), nullptr);  // preallocate space
+  ideal_nodes.push(root);
+
+  for (uint next = 0; next < ideal_nodes.size(); ++next) {
+    Node* n = ideal_nodes.at(next);
+
+    if (n->is_SafePointScalarMerge()) {
+      SafePointScalarMergeNode* merge = n->as_SafePointScalarMerge();
+
+      // Validate inputs of merge
+      for (uint i = 1; i < merge->req(); i++) {
+        if (merge->in(i) != nullptr && !merge->in(i)->is_top() && !merge->in(i)->is_SafePointScalarObject()) {
+          assert(false, "SafePointScalarMerge inputs should be null/top or SafePointScalarObject.");
+          C->record_failure(C2Compiler::retry_no_reduce_allocation_merges());
+        }
+      }
+
+      // Validate users of merge
+      for (DUIterator_Fast imax, i = merge->fast_outs(imax); i < imax; i++) {
+        Node* sfpt = merge->fast_out(i);
+        if (sfpt->is_SafePoint()) {
+          int merge_idx = merge->merge_pointer_idx(sfpt->as_SafePoint()->jvms());
+
+          if (sfpt->in(merge_idx) != nullptr && sfpt->in(merge_idx)->is_SafePointScalarMerge()) {
+            assert(false, "SafePointScalarMerge nodes can't be nested.");
+            C->record_failure(C2Compiler::retry_no_reduce_allocation_merges());
+          }
+        } else {
+          assert(false, "Only safepoints can use SafePointScalarMerge nodes.");
+          C->record_failure(C2Compiler::retry_no_reduce_allocation_merges());
+        }
+      }
+    }
+
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node* m = n->fast_out(i);
+      ideal_nodes.push(m);
+    }
+  }
+}
+
 // Returns true if there is an object in the scope of sfn that does not escape globally.
 bool ConnectionGraph::has_ea_local_in_scope(SafePointNode* sfn) {
  Compile* C = _compile;
@ -580,7 +952,8 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de
      } else {
        es = PointsToNode::GlobalEscape;
      }
-      add_java_object(n, es);
+      PointsToNode* ptn_con = add_java_object(n, es);
+      set_not_scalar_replaceable(ptn_con NOT_PRODUCT(COMMA "Constant pointer"));
      break;
    }
    case Op_CreateEx: {
@ -670,7 +1043,8 @@ void ConnectionGraph::add_node_to_connection_graph(Node *n, Unique_Node_List *de
      break;
    }
    case Op_ThreadLocal: {
-      add_java_object(n, PointsToNode::ArgEscape);
+      PointsToNode* ptn_thr = add_java_object(n, PointsToNode::ArgEscape);
+      set_not_scalar_replaceable(ptn_thr NOT_PRODUCT(COMMA "Constant pointer"));
      break;
    }
    case Op_Blackhole: {
@ -1048,6 +1422,9 @@ void ConnectionGraph::add_call_node(CallNode* call) {
        es = PointsToNode::GlobalEscape;
      }
      add_java_object(call, es);
+      if (es == PointsToNode::GlobalEscape) {
+        set_not_scalar_replaceable(ptnode_adr(call->_idx) NOT_PRODUCT(COMMA "object can be loaded from boxing cache"));
+      }
    } else {
      BCEscapeAnalyzer* call_analyzer = meth->get_bcea();
      call_analyzer->copy_dependencies(_compile->dependencies());
@ -1861,7 +2238,15 @@ int ConnectionGraph::find_init_values_null(JavaObjectNode* pta, PhaseValues* pha
 }

 // Adjust scalar_replaceable state after Connection Graph is built.
-void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
+void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj, Unique_Node_List &reducible_merges) {
+  // A Phi 'x' is a _candidate_ to be reducible if 'can_reduce_phi(x)'
+  // returns true. If one of the constraints in this method set 'jobj' to NSR
+  // then the candidate Phi is discarded. If the Phi has another SR 'jobj' as
+  // input, 'adjust_scalar_replaceable_state' will eventually be called with
+  // that other object and the Phi will become a reducible Phi.
+  // There could be multiple merges involving the same jobj.
+  Unique_Node_List candidates;
+
  // Search for non-escaping objects which are not scalar replaceable
  // and mark them to propagate the state to referenced objects.

@ -1896,13 +2281,28 @@ void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
      }
    }
    assert(use->is_Field() || use->is_LocalVar(), "sanity");
-    // 3. An object is not scalar replaceable if it is merged with other objects.
+    // 3. An object is not scalar replaceable if it is merged with other objects
+    // and we can't remove the merge
    for (EdgeIterator j(use); j.has_next(); j.next()) {
      PointsToNode* ptn = j.get();
      if (ptn->is_JavaObject() && ptn != jobj) {
-        // Mark all objects.
-        set_not_scalar_replaceable(jobj NOT_PRODUCT(COMMA trace_merged_message(ptn)));
-        set_not_scalar_replaceable(ptn NOT_PRODUCT(COMMA trace_merged_message(jobj)));
+        Node* use_n = use->ideal_node();
+
+        // If it's already a candidate or confirmed reducible merge we can skip verification
+        if (candidates.member(use_n)) {
+          continue;
+        } else if (reducible_merges.member(use_n)) {
+          candidates.push(use_n);
+          continue;
+        }
+
+        if (ReduceAllocationMerges && use_n->is_Phi() && can_reduce_phi(use_n->as_Phi())) {
+          candidates.push(use_n);
+        } else {
+          // Mark all objects as NSR if we can't remove the merge
+          set_not_scalar_replaceable(jobj NOT_PRODUCT(COMMA trace_merged_message(ptn)));
+          set_not_scalar_replaceable(ptn NOT_PRODUCT(COMMA trace_merged_message(jobj)));
+        }
      }
    }
    if (!jobj->scalar_replaceable()) {
@ -1965,7 +2365,7 @@ void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
    //    Point p[] = new Point[1];
    //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
    //
-    if (field->base_count() > 1) {
+    if (field->base_count() > 1 && candidates.size() == 0) {
      for (BaseIterator i(field); i.has_next(); i.next()) {
        PointsToNode* base = i.get();
        // Don't take into account LocalVar nodes which
@ -1977,8 +2377,21 @@ void ConnectionGraph::adjust_scalar_replaceable_state(JavaObjectNode* jobj) {
          set_not_scalar_replaceable(base NOT_PRODUCT(COMMA "may point to more than one object"));
        }
      }
+
+      if (!jobj->scalar_replaceable()) {
+        return;
+      }
    }
  }
+
+  // The candidate is truly a reducible merge only if none of the other
+  // constraints ruled it as NSR. There could be multiple merges involving the
+  // same jobj.
+  assert(jobj->scalar_replaceable(), "sanity");
+  for (uint i = 0; i < candidates.size(); i++ ) {
+    Node* candidate = candidates.at(i);
+    reducible_merges.push(candidate);
+  }
 }

 // Propagate NSR (Not scalar replaceable) state.
@ -2244,15 +2657,16 @@ void ConnectionGraph::add_local_var(Node *n, PointsToNode::EscapeState es) {
  map_ideal_node(n, ptadr);
 }

-void ConnectionGraph::add_java_object(Node *n, PointsToNode::EscapeState es) {
+PointsToNode* ConnectionGraph::add_java_object(Node *n, PointsToNode::EscapeState es) {
  PointsToNode* ptadr = _nodes.at(n->_idx);
  if (ptadr != nullptr) {
    assert(ptadr->is_JavaObject() && ptadr->ideal_node() == n, "sanity");
-    return;
+    return ptadr;
  }
  Compile* C = _compile;
  ptadr = new (C->comp_arena()) JavaObjectNode(this, n, es);
  map_ideal_node(n, ptadr);
+  return ptadr;
 }

 void ConnectionGraph::add_field(Node *n, PointsToNode::EscapeState es, int offset) {
@ -2343,8 +2757,7 @@ bool ConnectionGraph::is_oop_field(Node* n, int offset, bool* unsafe) {
 }

 // Returns unique pointed java object or null.
-JavaObjectNode* ConnectionGraph::unique_java_object(Node *n) {
-  assert(!_collecting, "should not call when constructed graph");
+JavaObjectNode* ConnectionGraph::unique_java_object(Node *n) const {
  // If the node was created after the escape computation we can't answer.
  uint idx = n->_idx;
  if (idx >= nodes_size()) {
@ -3183,7 +3596,8 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra
 //
 void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,
                                         GrowableArray<ArrayCopyNode*> &arraycopy_worklist,
-                                         GrowableArray<MergeMemNode*> &mergemem_worklist) {
+                                         GrowableArray<MergeMemNode*> &mergemem_worklist,
+                                         Unique_Node_List &reducible_merges) {
  GrowableArray<Node *>  memnode_worklist;
  GrowableArray<PhiNode *>  orig_phis;
  PhaseIterGVN  *igvn = _igvn;
@ -3330,7 +3744,12 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,
        }
      }
    } else if (n->is_AddP()) {
-      JavaObjectNode* jobj = unique_java_object(get_addp_base(n));
+      Node* addp_base = get_addp_base(n);
+      if (addp_base != nullptr && reducible_merges.member(addp_base)) {
+        // This AddP will go away when we reduce the the Phi
+        continue;
+      }
+      JavaObjectNode* jobj = unique_java_object(addp_base);
      if (jobj == nullptr || jobj == phantom_obj) {
 #ifdef ASSERT
        ptnode_adr(get_addp_base(n)->_idx)->dump();
@ -3351,6 +3770,12 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,
        assert(n->is_Phi(), "loops only through Phi's");
        continue;  // already processed
      }
+      // Reducible Phi's will be removed from the graph after split_unique_types finishes
+      if (reducible_merges.member(n)) {
+        // Split loads through phi
+        reduce_phi_on_field_access(n->as_Phi(), alloc_worklist);
+        continue;
+      }
      JavaObjectNode* jobj = unique_java_object(n);
      if (jobj == nullptr || jobj == phantom_obj) {
 #ifdef ASSERT
@ -3493,7 +3918,6 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,

  // New alias types were created in split_AddP().
  uint new_index_end = (uint) _compile->num_alias_types();
-  assert(unique_old == _compile->unique(), "there should be no new ideal nodes after Phase 1");

  //  Phase 2:  Process MemNode's from memnode_worklist. compute new address type and
  //            compute new values for Memory inputs  (the Memory inputs are not
--- a/src/hotspot/share/opto/escape.hpp
+++ b/src/hotspot/share/opto/escape.hpp
@ -361,7 +361,7 @@ private:

  // Add nodes to ConnectionGraph.
  void add_local_var(Node* n, PointsToNode::EscapeState es);
-  void add_java_object(Node* n, PointsToNode::EscapeState es);
+  PointsToNode* add_java_object(Node* n, PointsToNode::EscapeState es);
  void add_field(Node* n, PointsToNode::EscapeState es, int offset);
  void add_arraycopy(Node* n, PointsToNode::EscapeState es, PointsToNode* src, PointsToNode* dst);

@ -442,6 +442,10 @@ private:
        NOT_PRODUCT(trace_es_update_helper(ptn, esc, true, reason));
        ptn->set_fields_escape_state(esc);
      }
+
+      if (esc != PointsToNode::NoEscape) {
+        ptn->set_scalar_replaceable(false);
+      }
    }
  }
  void set_fields_escape_state(PointsToNode* ptn, PointsToNode::EscapeState esc
@ -452,6 +456,10 @@ private:
        NOT_PRODUCT(trace_es_update_helper(ptn, esc, true, reason));
        ptn->set_fields_escape_state(esc);
      }
+
+      if (esc != PointsToNode::NoEscape) {
+        ptn->set_scalar_replaceable(false);
+      }
    }
  }

@ -461,7 +469,7 @@ private:
                                GrowableArray<JavaObjectNode*>& non_escaped_worklist);

  // Adjust scalar_replaceable state after Connection Graph is built.
-  void adjust_scalar_replaceable_state(JavaObjectNode* jobj);
+  void adjust_scalar_replaceable_state(JavaObjectNode* jobj, Unique_Node_List &reducible_merges);

  // Propagate NSR (Not scalar replaceable) state.
  void find_scalar_replaceable_allocs(GrowableArray<JavaObjectNode*>& jobj_worklist);
@ -473,7 +481,7 @@ private:
  const TypeInt* optimize_ptr_compare(Node* n);

  // Returns unique corresponding java object or null.
-  JavaObjectNode* unique_java_object(Node *n);
+  JavaObjectNode* unique_java_object(Node *n) const;

  // Add an edge of the specified type pointing to the specified target.
  bool add_edge(PointsToNode* from, PointsToNode* to) {
@ -533,7 +541,8 @@ private:
  // Propagate unique types created for non-escaped allocated objects through the graph
  void split_unique_types(GrowableArray<Node *>  &alloc_worklist,
                          GrowableArray<ArrayCopyNode*> &arraycopy_worklist,
-                          GrowableArray<MergeMemNode*> &mergemem_worklist);
+                          GrowableArray<MergeMemNode*> &mergemem_worklist,
+                          Unique_Node_List &reducible_merges);

  // Helper methods for unique types split.
  bool split_AddP(Node *addp, Node *base);
@ -578,6 +587,17 @@ private:
  // Compute the escape information
  bool compute_escape();

+  // -------------------------------------------
+  // Methods related to Reduce Allocation Merges
+
+  bool can_reduce_phi(PhiNode* ophi) const;
+  bool can_reduce_phi_check_users(PhiNode* ophi) const;
+  bool can_reduce_phi_check_inputs(PhiNode* ophi) const;
+
+  void reduce_phi_on_field_access(PhiNode* ophi, GrowableArray<Node *>  &alloc_worklist);
+  void reduce_phi_on_safepoints(PhiNode* ophi, Unique_Node_List* safepoints);
+  void reduce_phi(PhiNode* ophi);
+
  void set_not_scalar_replaceable(PointsToNode* ptn NOT_PRODUCT(COMMA const char* reason)) const {
 #ifndef PRODUCT
    if (_compile->directive()->TraceEscapeAnalysisOption) {
@ -599,6 +619,9 @@ private:
 public:
  ConnectionGraph(Compile *C, PhaseIterGVN *igvn, int iteration);

+  // Verify that SafePointScalarMerge nodes are correctly connected
+  static void verify_ram_nodes(Compile* C, Node* root);
+
  // Check for non-escaping candidates
  static bool has_candidates(Compile *C);

--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@ -285,7 +285,7 @@ Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset,
  if (ac->is_clonebasic()) {
    assert(ac->in(ArrayCopyNode::Src) != ac->in(ArrayCopyNode::Dest), "clone source equals destination");
    Node* base = ac->in(ArrayCopyNode::Src);
-    Node* adr = _igvn.transform(new AddPNode(base, base, MakeConX(offset)));
+    Node* adr = _igvn.transform(new AddPNode(base, base, _igvn.MakeConX(offset)));
    const TypePtr* adr_type = _igvn.type(base)->is_ptr()->add_offset(offset);
    MergeMemNode* mergemen = _igvn.transform(MergeMemNode::make(mem))->as_MergeMem();
    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
@ -304,7 +304,7 @@ Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset,
      if (src_pos_t->is_con() && dest_pos_t->is_con()) {
        intptr_t off = ((src_pos_t->get_con() - dest_pos_t->get_con()) << shift) + offset;
        Node* base = ac->in(ArrayCopyNode::Src);
-        adr = _igvn.transform(new AddPNode(base, base, MakeConX(off)));
+        adr = _igvn.transform(new AddPNode(base, base, _igvn.MakeConX(off)));
        adr_type = _igvn.type(base)->is_ptr()->add_offset(off);
        if (ac->in(ArrayCopyNode::Src) == ac->in(ArrayCopyNode::Dest)) {
          // Don't emit a new load from src if src == dst but try to get the value from memory instead
@ -315,9 +315,9 @@ Node* PhaseMacroExpand::make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset,
 #ifdef _LP64
        diff = _igvn.transform(new ConvI2LNode(diff));
 #endif
-        diff = _igvn.transform(new LShiftXNode(diff, intcon(shift)));
+        diff = _igvn.transform(new LShiftXNode(diff, _igvn.intcon(shift)));

-        Node* off = _igvn.transform(new AddXNode(MakeConX(offset), diff));
+        Node* off = _igvn.transform(new AddXNode(_igvn.MakeConX(offset), diff));
        Node* base = ac->in(ArrayCopyNode::Src);
        adr = _igvn.transform(new AddPNode(base, base, off));
        adr_type = _igvn.type(base)->is_ptr()->add_offset(Type::OffsetBot);
@ -550,12 +550,13 @@ Node *PhaseMacroExpand::value_from_mem(Node *sfpt_mem, Node *sfpt_ctl, BasicType
 }

 // Check the possibility of scalar replacement.
-bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints) {
+bool PhaseMacroExpand::can_eliminate_allocation(PhaseIterGVN* igvn, AllocateNode *alloc, GrowableArray <SafePointNode *>* safepoints) {
  //  Scan the uses of the allocation to check for anything that would
  //  prevent us from eliminating it.
  NOT_PRODUCT( const char* fail_eliminate = nullptr; )
  DEBUG_ONLY( Node* disq_node = nullptr; )
-  bool  can_eliminate = true;
+  bool can_eliminate = true;
+  bool reduce_merge_precheck = (safepoints == nullptr);

  Node* res = alloc->result_cast();
  const TypeOopPtr* res_type = nullptr;
@ -565,7 +566,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
    NOT_PRODUCT(fail_eliminate = "Allocation does not have unique CheckCastPP";)
    can_eliminate = false;
  } else {
-    res_type = _igvn.type(res)->isa_oopptr();
+    res_type = igvn->type(res)->isa_oopptr();
    if (res_type == nullptr) {
      NOT_PRODUCT(fail_eliminate = "Neither instance or array allocation";)
      can_eliminate = false;
@ -585,7 +586,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
      Node* use = res->fast_out(j);

      if (use->is_AddP()) {
-        const TypePtr* addp_type = _igvn.type(use)->is_ptr();
+        const TypePtr* addp_type = igvn->type(use)->is_ptr();
        int offset = addp_type->offset();

        if (offset == Type::OffsetTop || offset == Type::OffsetBot) {
@ -626,9 +627,11 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
          DEBUG_ONLY(disq_node = use;)
          NOT_PRODUCT(fail_eliminate = "null or TOP memory";)
          can_eliminate = false;
-        } else {
-          safepoints.append_if_missing(sfpt);
+        } else if (!reduce_merge_precheck) {
+          safepoints->append_if_missing(sfpt);
        }
+      } else if (reduce_merge_precheck && (use->is_Phi() || use->is_EncodeP() || use->Opcode() == Op_MemBarRelease)) {
+        // Nothing to do
      } else if (use->Opcode() != Op_CastP2X) { // CastP2X is used by card mark
        if (use->is_Phi()) {
          if (use->outcnt() == 1 && use->unique_out()->Opcode() == Op_Return) {
@ -640,7 +643,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
        } else {
          if (use->Opcode() == Op_Return) {
            NOT_PRODUCT(fail_eliminate = "Object is return value";)
-          }else {
+          } else {
            NOT_PRODUCT(fail_eliminate = "Object is referenced by node";)
          }
          DEBUG_ONLY(disq_node = use;)
@ -651,7 +654,7 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
  }

 #ifndef PRODUCT
-  if (PrintEliminateAllocations) {
+  if (PrintEliminateAllocations && safepoints != nullptr) {
    if (can_eliminate) {
      tty->print("Scalar ");
      if (res == nullptr)
@ -676,25 +679,73 @@ bool PhaseMacroExpand::can_eliminate_allocation(AllocateNode *alloc, GrowableArr
  return can_eliminate;
 }

-// Do scalar replacement.
-bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints) {
-  GrowableArray <SafePointNode *> safepoints_done;
-
-  ciInstanceKlass* iklass = nullptr;
-  int nfields = 0;
-  int array_base = 0;
-  int element_size = 0;
-  BasicType basic_elem_type = T_ILLEGAL;
-  const Type* field_type = nullptr;
-
+void PhaseMacroExpand::undo_previous_scalarizations(GrowableArray <SafePointNode *> safepoints_done, AllocateNode* alloc) {
  Node* res = alloc->result_cast();
+  int nfields = 0;
  assert(res == nullptr || res->is_CheckCastPP(), "unexpected AllocateNode result");
-  const TypeOopPtr* res_type = nullptr;
-  if (res != nullptr) { // Could be null when there are no users
-    res_type = _igvn.type(res)->isa_oopptr();
-  }

  if (res != nullptr) {
+    const TypeOopPtr* res_type = _igvn.type(res)->isa_oopptr();
+
+    if (res_type->isa_instptr()) {
+      // find the fields of the class which will be needed for safepoint debug information
+      ciInstanceKlass* iklass = res_type->is_instptr()->instance_klass();
+      nfields = iklass->nof_nonstatic_fields();
+    } else {
+      // find the array's elements which will be needed for safepoint debug information
+      nfields = alloc->in(AllocateNode::ALength)->find_int_con(-1);
+      assert(nfields >= 0, "must be an array klass.");
+    }
+  }
+
+  // rollback processed safepoints
+  while (safepoints_done.length() > 0) {
+    SafePointNode* sfpt_done = safepoints_done.pop();
+    // remove any extra entries we added to the safepoint
+    uint last = sfpt_done->req() - 1;
+    for (int k = 0;  k < nfields; k++) {
+      sfpt_done->del_req(last--);
+    }
+    JVMState *jvms = sfpt_done->jvms();
+    jvms->set_endoff(sfpt_done->req());
+    // Now make a pass over the debug information replacing any references
+    // to SafePointScalarObjectNode with the allocated object.
+    int start = jvms->debug_start();
+    int end   = jvms->debug_end();
+    for (int i = start; i < end; i++) {
+      if (sfpt_done->in(i)->is_SafePointScalarObject()) {
+        SafePointScalarObjectNode* scobj = sfpt_done->in(i)->as_SafePointScalarObject();
+        if (scobj->first_index(jvms) == sfpt_done->req() &&
+            scobj->n_fields() == (uint)nfields) {
+          assert(scobj->alloc() == alloc, "sanity");
+          sfpt_done->set_req(i, res);
+        }
+      }
+    }
+    _igvn._worklist.push(sfpt_done);
+  }
+}
+
+SafePointScalarObjectNode* PhaseMacroExpand::create_scalarized_object_description(AllocateNode *alloc, SafePointNode* sfpt) {
+  // Fields of scalar objs are referenced only at the end
+  // of regular debuginfo at the last (youngest) JVMS.
+  // Record relative start index.
+  ciInstanceKlass* iklass    = nullptr;
+  BasicType basic_elem_type  = T_ILLEGAL;
+  const Type* field_type     = nullptr;
+  const TypeOopPtr* res_type = nullptr;
+  int nfields                = 0;
+  int array_base             = 0;
+  int element_size           = 0;
+  uint first_ind             = (sfpt->req() - sfpt->jvms()->scloff());
+  Node* res                  = alloc->result_cast();
+
+  assert(res == nullptr || res->is_CheckCastPP(), "unexpected AllocateNode result");
+  assert(sfpt->jvms() != nullptr, "missed JVMS");
+
+  if (res != nullptr) { // Could be null when there are no users
+    res_type = _igvn.type(res)->isa_oopptr();
+
    if (res_type->isa_instptr()) {
      // find the fields of the class which will be needed for safepoint debug information
      iklass = res_type->is_instptr()->instance_klass();
@ -709,141 +760,122 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
      field_type = res_type->is_aryptr()->elem();
    }
  }
-  //
-  // Process the safepoint uses
-  //
-  while (safepoints.length() > 0) {
-    SafePointNode* sfpt = safepoints.pop();
-    Node* mem = sfpt->memory();
-    Node* ctl = sfpt->control();
-    assert(sfpt->jvms() != nullptr, "missed JVMS");
-    // Fields of scalar objs are referenced only at the end
-    // of regular debuginfo at the last (youngest) JVMS.
-    // Record relative start index.
-    uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
-    SafePointScalarObjectNode* sobj = new SafePointScalarObjectNode(res_type,
-#ifdef ASSERT
-                                                 alloc,
-#endif
-                                                 first_ind, nfields);
-    sobj->init_req(0, C->root());
-    transform_later(sobj);

-    // Scan object's fields adding an input to the safepoint for each field.
-    for (int j = 0; j < nfields; j++) {
-      intptr_t offset;
-      ciField* field = nullptr;
-      if (iklass != nullptr) {
-        field = iklass->nonstatic_field_at(j);
-        offset = field->offset_in_bytes();
-        ciType* elem_type = field->type();
-        basic_elem_type = field->layout_type();
+  SafePointScalarObjectNode* sobj = new SafePointScalarObjectNode(res_type, alloc, first_ind, nfields);
+  sobj->init_req(0, C->root());
+  transform_later(sobj);

-        // The next code is taken from Parse::do_get_xxx().
-        if (is_reference_type(basic_elem_type)) {
-          if (!elem_type->is_loaded()) {
-            field_type = TypeInstPtr::BOTTOM;
-          } else if (field != nullptr && field->is_static_constant()) {
-            ciObject* con = field->constant_value().as_object();
-            // Do not "join" in the previous type; it doesn't add value,
-            // and may yield a vacuous result if the field is of interface type.
-            field_type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
-            assert(field_type != nullptr, "field singleton type must be consistent");
-          } else {
-            field_type = TypeOopPtr::make_from_klass(elem_type->as_klass());
-          }
-          if (UseCompressedOops) {
-            field_type = field_type->make_narrowoop();
-            basic_elem_type = T_NARROWOOP;
-          }
+  // Scan object's fields adding an input to the safepoint for each field.
+  for (int j = 0; j < nfields; j++) {
+    intptr_t offset;
+    ciField* field = nullptr;
+    if (iklass != nullptr) {
+      field = iklass->nonstatic_field_at(j);
+      offset = field->offset_in_bytes();
+      ciType* elem_type = field->type();
+      basic_elem_type = field->layout_type();
+
+      // The next code is taken from Parse::do_get_xxx().
+      if (is_reference_type(basic_elem_type)) {
+        if (!elem_type->is_loaded()) {
+          field_type = TypeInstPtr::BOTTOM;
+        } else if (field != nullptr && field->is_static_constant()) {
+          ciObject* con = field->constant_value().as_object();
+          // Do not "join" in the previous type; it doesn't add value,
+          // and may yield a vacuous result if the field is of interface type.
+          field_type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
+          assert(field_type != nullptr, "field singleton type must be consistent");
        } else {
-          field_type = Type::get_const_basic_type(basic_elem_type);
+          field_type = TypeOopPtr::make_from_klass(elem_type->as_klass());
+        }
+        if (UseCompressedOops) {
+          field_type = field_type->make_narrowoop();
+          basic_elem_type = T_NARROWOOP;
        }
      } else {
-        offset = array_base + j * (intptr_t)element_size;
+        field_type = Type::get_const_basic_type(basic_elem_type);
      }
-
-      const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr();
-
-      Node *field_val = value_from_mem(mem, ctl, basic_elem_type, field_type, field_addr_type, alloc);
-      if (field_val == nullptr) {
-        // We weren't able to find a value for this field,
-        // give up on eliminating this allocation.
-
-        // Remove any extra entries we added to the safepoint.
-        uint last = sfpt->req() - 1;
-        for (int k = 0;  k < j; k++) {
-          sfpt->del_req(last--);
-        }
-        _igvn._worklist.push(sfpt);
-        // rollback processed safepoints
-        while (safepoints_done.length() > 0) {
-          SafePointNode* sfpt_done = safepoints_done.pop();
-          // remove any extra entries we added to the safepoint
-          last = sfpt_done->req() - 1;
-          for (int k = 0;  k < nfields; k++) {
-            sfpt_done->del_req(last--);
-          }
-          JVMState *jvms = sfpt_done->jvms();
-          jvms->set_endoff(sfpt_done->req());
-          // Now make a pass over the debug information replacing any references
-          // to SafePointScalarObjectNode with the allocated object.
-          int start = jvms->debug_start();
-          int end   = jvms->debug_end();
-          for (int i = start; i < end; i++) {
-            if (sfpt_done->in(i)->is_SafePointScalarObject()) {
-              SafePointScalarObjectNode* scobj = sfpt_done->in(i)->as_SafePointScalarObject();
-              if (scobj->first_index(jvms) == sfpt_done->req() &&
-                  scobj->n_fields() == (uint)nfields) {
-                assert(scobj->alloc() == alloc, "sanity");
-                sfpt_done->set_req(i, res);
-              }
-            }
-          }
-          _igvn._worklist.push(sfpt_done);
-        }
-#ifndef PRODUCT
-        if (PrintEliminateAllocations) {
-          if (field != nullptr) {
-            tty->print("=== At SafePoint node %d can't find value of Field: ",
-                       sfpt->_idx);
-            field->print();
-            int field_idx = C->get_alias_index(field_addr_type);
-            tty->print(" (alias_idx=%d)", field_idx);
-          } else { // Array's element
-            tty->print("=== At SafePoint node %d can't find value of array element [%d]",
-                       sfpt->_idx, j);
-          }
-          tty->print(", which prevents elimination of: ");
-          if (res == nullptr)
-            alloc->dump();
-          else
-            res->dump();
-        }
-#endif
-        return false;
-      }
-      if (UseCompressedOops && field_type->isa_narrowoop()) {
-        // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation
-        // to be able scalar replace the allocation.
-        if (field_val->is_EncodeP()) {
-          field_val = field_val->in(1);
-        } else {
-          field_val = transform_later(new DecodeNNode(field_val, field_val->get_ptr_type()));
-        }
-      }
-      sfpt->add_req(field_val);
+    } else {
+      offset = array_base + j * (intptr_t)element_size;
    }
-    JVMState *jvms = sfpt->jvms();
-    jvms->set_endoff(sfpt->req());
+
+    const TypeOopPtr *field_addr_type = res_type->add_offset(offset)->isa_oopptr();
+
+    Node *field_val = value_from_mem(sfpt->memory(), sfpt->control(), basic_elem_type, field_type, field_addr_type, alloc);
+
+    // We weren't able to find a value for this field,
+    // give up on eliminating this allocation.
+    if (field_val == nullptr) {
+      uint last = sfpt->req() - 1;
+      for (int k = 0;  k < j; k++) {
+        sfpt->del_req(last--);
+      }
+      _igvn._worklist.push(sfpt);
+
+#ifndef PRODUCT
+      if (PrintEliminateAllocations) {
+        if (field != nullptr) {
+          tty->print("=== At SafePoint node %d can't find value of field: ", sfpt->_idx);
+          field->print();
+          int field_idx = C->get_alias_index(field_addr_type);
+          tty->print(" (alias_idx=%d)", field_idx);
+        } else { // Array's element
+          tty->print("=== At SafePoint node %d can't find value of array element [%d]", sfpt->_idx, j);
+        }
+        tty->print(", which prevents elimination of: ");
+        if (res == nullptr)
+          alloc->dump();
+        else
+          res->dump();
+      }
+#endif
+
+      return nullptr;
+    }
+
+    if (UseCompressedOops && field_type->isa_narrowoop()) {
+      // Enable "DecodeN(EncodeP(Allocate)) --> Allocate" transformation
+      // to be able scalar replace the allocation.
+      if (field_val->is_EncodeP()) {
+        field_val = field_val->in(1);
+      } else {
+        field_val = transform_later(new DecodeNNode(field_val, field_val->get_ptr_type()));
+      }
+    }
+    sfpt->add_req(field_val);
+  }
+
+  sfpt->jvms()->set_endoff(sfpt->req());
+
+  return sobj;
+}
+
+// Do scalar replacement.
+bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints) {
+  GrowableArray <SafePointNode *> safepoints_done;
+  Node* res = alloc->result_cast();
+  assert(res == nullptr || res->is_CheckCastPP(), "unexpected AllocateNode result");
+
+  // Process the safepoint uses
+  while (safepoints.length() > 0) {
+    SafePointNode* sfpt = safepoints.pop();
+    SafePointScalarObjectNode* sobj = create_scalarized_object_description(alloc, sfpt);
+
+    if (sobj == nullptr) {
+      undo_previous_scalarizations(safepoints_done, alloc);
+      return false;
+    }
+
    // Now make a pass over the debug information replacing any references
    // to the allocated object with "sobj"
-    int start = jvms->debug_start();
-    int end   = jvms->debug_end();
-    sfpt->replace_edges_in_range(res, sobj, start, end, &_igvn);
+    JVMState *jvms = sfpt->jvms();
+    sfpt->replace_edges_in_range(res, sobj, jvms->debug_start(), jvms->debug_end(), &_igvn);
    _igvn._worklist.push(sfpt);
-    safepoints_done.append_if_missing(sfpt); // keep it for rollback
+
+    // keep it for rollback
+    safepoints_done.append_if_missing(sfpt);
  }
+
  return true;
 }

@ -1030,7 +1062,7 @@ bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) {
  alloc->extract_projections(&_callprojs, false /*separate_io_proj*/, false /*do_asserts*/);

  GrowableArray <SafePointNode *> safepoints;
-  if (!can_eliminate_allocation(alloc, safepoints)) {
+  if (!can_eliminate_allocation(&_igvn, alloc, &safepoints)) {
    return false;
  }

--- a/src/hotspot/share/opto/macro.hpp
+++ b/src/hotspot/share/opto/macro.hpp
@ -99,8 +99,8 @@ private:

  bool eliminate_boxing_node(CallStaticJavaNode *boxing);
  bool eliminate_allocate_node(AllocateNode *alloc);
-  bool can_eliminate_allocation(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints);
-  bool scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints_done);
+  void undo_previous_scalarizations(GrowableArray <SafePointNode *> safepoints_done, AllocateNode* alloc);
+  bool scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints);
  void process_users_of_allocation(CallNode *alloc);

  void eliminate_gc_barrier(Node *p2x);
@ -205,6 +205,10 @@ public:
  void eliminate_macro_nodes();
  bool expand_macro_nodes();

+  SafePointScalarObjectNode* create_scalarized_object_description(AllocateNode *alloc, SafePointNode* sfpt);
+  static bool can_eliminate_allocation(PhaseIterGVN *igvn, AllocateNode *alloc, GrowableArray <SafePointNode *> *safepoints);
+
+
  PhaseIterGVN &igvn() const { return _igvn; }

 #ifndef PRODUCT
--- a/src/hotspot/share/opto/memnode.cpp
+++ b/src/hotspot/share/opto/memnode.cpp
@ -1517,9 +1517,38 @@ static bool stable_phi(PhiNode* phi, PhaseGVN *phase) {
  }
  return true;
 }
+
+//------------------------------split_through_phi------------------------------
+// Check whether a call to 'split_through_phi' would split this load through the
+// Phi *base*. This method is essentially a copy of the validations performed
+// by 'split_through_phi'. The first use of this method was in EA code as part
+// of simplification of allocation merges.
+bool LoadNode::can_split_through_phi_base(PhaseGVN* phase) {
+  Node* mem        = in(Memory);
+  Node* address    = in(Address);
+  intptr_t ignore  = 0;
+  Node*    base    = AddPNode::Ideal_base_and_offset(address, phase, ignore);
+  bool base_is_phi = (base != nullptr) && base->is_Phi();
+
+  if (req() > 3 || !base_is_phi) {
+    return false;
+  }
+
+  if (!mem->is_Phi()) {
+    if (!MemNode::all_controls_dominate(mem, base->in(0)))
+      return false;
+  } else if (base->in(0) != mem->in(0)) {
+    if (!MemNode::all_controls_dominate(mem, base->in(0))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 //------------------------------split_through_phi------------------------------
 // Split instance or boxed field load through Phi.
-Node* LoadNode::split_through_phi(PhaseGVN* phase) {
+Node* LoadNode::split_through_phi(PhaseGVN* phase, bool ignore_missing_instance_id) {
  if (req() > 3) {
    assert(is_LoadVector() && Opcode() != Op_LoadVector, "load has too many inputs");
    // LoadVector subclasses such as LoadVectorMasked have extra inputs that the logic below doesn't take into account
@ -1530,7 +1559,8 @@ Node* LoadNode::split_through_phi(PhaseGVN* phase) {
  const TypeOopPtr *t_oop = phase->type(address)->isa_oopptr();

  assert((t_oop != nullptr) &&
-         (t_oop->is_known_instance_field() ||
+         (ignore_missing_instance_id ||
+          t_oop->is_known_instance_field() ||
          t_oop->is_ptr_to_boxed_value()), "invalid conditions");

  Compile* C = phase->C;
@ -1542,8 +1572,8 @@ Node* LoadNode::split_through_phi(PhaseGVN* phase) {
                           phase->type(base)->higher_equal(TypePtr::NOTNULL);

  if (!((mem->is_Phi() || base_is_phi) &&
-        (load_boxed_values || t_oop->is_known_instance_field()))) {
-    return nullptr; // memory is not Phi
+        (ignore_missing_instance_id || load_boxed_values || t_oop->is_known_instance_field()))) {
+    return nullptr; // Neither memory or base are Phi
  }

  if (mem->is_Phi()) {
@ -1587,7 +1617,7 @@ Node* LoadNode::split_through_phi(PhaseGVN* phase) {
  }

  // Split through Phi (see original code in loopopts.cpp).
-  assert(C->have_alias_type(t_oop), "instance should have alias type");
+  assert(ignore_missing_instance_id || C->have_alias_type(t_oop), "instance should have alias type");

  // Do nothing here if Identity will find a value
  // (to avoid infinite chain of value phis generation).
@ -1623,16 +1653,20 @@ Node* LoadNode::split_through_phi(PhaseGVN* phase) {
    region = mem->in(0);
  }

+  Node* phi = nullptr;
  const Type* this_type = this->bottom_type();
-  int this_index  = C->get_alias_index(t_oop);
-  int this_offset = t_oop->offset();
-  int this_iid    = t_oop->instance_id();
-  if (!t_oop->is_known_instance() && load_boxed_values) {
-    // Use _idx of address base for boxed values.
-    this_iid = base->_idx;
-  }
  PhaseIterGVN* igvn = phase->is_IterGVN();
-  Node* phi = new PhiNode(region, this_type, nullptr, mem->_idx, this_iid, this_index, this_offset);
+  if (t_oop != nullptr && (t_oop->is_known_instance_field() || load_boxed_values)) {
+    int this_index = C->get_alias_index(t_oop);
+    int this_offset = t_oop->offset();
+    int this_iid = t_oop->is_known_instance_field() ? t_oop->instance_id() : base->_idx;
+    phi = new PhiNode(region, this_type, nullptr, mem->_idx, this_iid, this_index, this_offset);
+  } else if (ignore_missing_instance_id) {
+    phi = new PhiNode(region, this_type, nullptr, mem->_idx);
+  } else {
+    return nullptr;
+  }
+
  for (uint i = 1; i < region->req(); i++) {
    Node* x;
    Node* the_clone = nullptr;
--- a/src/hotspot/share/opto/memnode.hpp
+++ b/src/hotspot/share/opto/memnode.hpp
@ -244,8 +244,11 @@ public:
  // try to hook me up to the exact initializing store.
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);

+  // Return true if it's possible to split the Load through a Phi merging the bases
+  bool can_split_through_phi_base(PhaseGVN *phase);
+
  // Split instance field load through Phi.
-  Node* split_through_phi(PhaseGVN *phase);
+  Node* split_through_phi(PhaseGVN *phase, bool ignore_missing_instance_id = false);

  // Recover original value from boxed values
  Node *eliminate_autobox(PhaseIterGVN *igvn);
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@ -158,6 +158,7 @@ class RegionNode;
 class RootNode;
 class SafePointNode;
 class SafePointScalarObjectNode;
+class SafePointScalarMergeNode;
 class StartNode;
 class State;
 class StoreNode;
@ -726,6 +727,7 @@ public:
          DEFINE_CLASS_ID(UnorderedReduction, Reduction, 0)
      DEFINE_CLASS_ID(Con, Type, 8)
          DEFINE_CLASS_ID(ConI, Con, 0)
+      DEFINE_CLASS_ID(SafePointScalarMerge, Type, 9)


    DEFINE_CLASS_ID(Proj,  Node, 3)
@ -953,6 +955,7 @@ public:
  DEFINE_CLASS_QUERY(Root)
  DEFINE_CLASS_QUERY(SafePoint)
  DEFINE_CLASS_QUERY(SafePointScalarObject)
+  DEFINE_CLASS_QUERY(SafePointScalarMerge)
  DEFINE_CLASS_QUERY(Start)
  DEFINE_CLASS_QUERY(Store)
  DEFINE_CLASS_QUERY(Sub)
--- a/src/hotspot/share/opto/output.cpp
+++ b/src/hotspot/share/opto/output.cpp
@ -749,7 +749,7 @@ void PhaseOutput::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
  if (local->is_SafePointScalarObject()) {
    SafePointScalarObjectNode* spobj = local->as_SafePointScalarObject();

-    ObjectValue* sv = sv_for_node_id(objs, spobj->_idx);
+    ObjectValue* sv = (ObjectValue*) sv_for_node_id(objs, spobj->_idx);
    if (sv == nullptr) {
      ciKlass* cik = t->is_oopptr()->exact_klass();
      assert(cik->is_instance_klass() ||
@ -766,6 +766,31 @@ void PhaseOutput::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
    }
    array->append(sv);
    return;
+  } else if (local->is_SafePointScalarMerge()) {
+    SafePointScalarMergeNode* smerge = local->as_SafePointScalarMerge();
+    ObjectMergeValue* mv = (ObjectMergeValue*) sv_for_node_id(objs, smerge->_idx);
+
+    if (mv == NULL) {
+      GrowableArray<ScopeValue*> deps;
+
+      int merge_pointer_idx = smerge->merge_pointer_idx(sfpt->jvms());
+      (void)FillLocArray(0, sfpt, sfpt->in(merge_pointer_idx), &deps, objs);
+      assert(deps.length() == 1, "missing value");
+
+      int selector_idx = smerge->selector_idx(sfpt->jvms());
+      (void)FillLocArray(1, NULL, sfpt->in(selector_idx), &deps, NULL);
+      assert(deps.length() == 2, "missing value");
+
+      mv = new ObjectMergeValue(smerge->_idx, deps.at(0), deps.at(1));
+      set_sv_for_object_node(objs, mv);
+
+      for (uint i = 1; i < smerge->req(); i++) {
+        Node* obj_node = smerge->in(i);
+        (void)FillLocArray(mv->possible_objects()->length(), sfpt, obj_node, mv->possible_objects(), objs);
+      }
+    }
+    array->append(mv);
+    return;
  }

  // Grab the register number for the local
@ -931,6 +956,18 @@ bool PhaseOutput::starts_bundle(const Node *n) const {
          _node_bundling_base[n->_idx].starts_bundle());
 }

+// Determine if there is a monitor that has 'ov' as its owner.
+bool PhaseOutput::contains_as_owner(GrowableArray<MonitorValue*> *monarray, ObjectValue *ov) const {
+  for (int k = 0; k < monarray->length(); k++) {
+    MonitorValue* mv = monarray->at(k);
+    if (mv->owner() == ov) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
 //--------------------------Process_OopMap_Node--------------------------------
 void PhaseOutput::Process_OopMap_Node(MachNode *mach, int current_offset) {
  // Handle special safepoint nodes for synchronization
@ -1061,6 +1098,21 @@ void PhaseOutput::Process_OopMap_Node(MachNode *mach, int current_offset) {
      monarray->append(new MonitorValue(scval, basic_lock, eliminated));
    }

+    // Mark ObjectValue nodes as root nodes if they are directly
+    // referenced in the JVMS.
+    for (int i = 0; i < objs->length(); i++) {
+      ScopeValue* sv = objs->at(i);
+      if (sv->is_object_merge()) {
+        ObjectMergeValue* merge = sv->as_ObjectMergeValue();
+
+        for (int j = 0; j< merge->possible_objects()->length(); j++) {
+          ObjectValue* ov = merge->possible_objects()->at(j)->as_ObjectValue();
+          bool is_root = locarray->contains(ov) || exparray->contains(ov) || contains_as_owner(monarray, ov);
+          ov->set_root(is_root);
+        }
+      }
+    }
+
    // We dump the object pool first, since deoptimization reads it in first.
    C->debug_info()->dump_object_pool(objs);

--- a/src/hotspot/share/opto/output.hpp
+++ b/src/hotspot/share/opto/output.hpp
@ -210,6 +210,7 @@ public:
  bool valid_bundle_info(const Node *n);

  bool starts_bundle(const Node *n) const;
+  bool contains_as_owner(GrowableArray<MonitorValue*> *monarray, ObjectValue *ov) const;

  // Dump formatted assembly
 #if defined(SUPPORT_OPTO_ASSEMBLY)
--- a/src/hotspot/share/opto/vector.cpp
+++ b/src/hotspot/share/opto/vector.cpp
@ -276,11 +276,7 @@ void PhaseVector::scalarize_vbox_node(VectorBoxNode* vec_box) {
    SafePointNode* sfpt = safepoints.pop()->as_SafePoint();

    uint first_ind = (sfpt->req() - sfpt->jvms()->scloff());
-    Node* sobj = new SafePointScalarObjectNode(vec_box->box_type(),
-#ifdef ASSERT
-                                               vec_box,
-#endif // ASSERT
-                                               first_ind, n_fields);
+    Node* sobj = new SafePointScalarObjectNode(vec_box->box_type(), vec_box, first_ind, n_fields);
    sobj->init_req(0, C->root());
    sfpt->add_req(vec_value);

--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@ -330,7 +330,7 @@ static bool rematerialize_objects(JavaThread* thread, int exec_mode, CompiledMet
  assert(exec_mode == Deoptimization::Unpack_none || (deoptee_thread == thread),
         "a frame can only be deoptimized by the owner thread");

-  GrowableArray<ScopeValue*>* objects = chunk->at(0)->scope()->objects();
+  GrowableArray<ScopeValue*>* objects = chunk->at(0)->scope()->objects_to_rematerialize(deoptee, map);

  // The flag return_oop() indicates call sites which return oop
  // in compiled code. Such sites include java method calls,
@ -1564,6 +1564,7 @@ static int reassign_fields_by_klass(InstanceKlass* klass, frame* fr, RegisterMap
 // restore fields of all eliminated objects and arrays
 void Deoptimization::reassign_fields(frame* fr, RegisterMap* reg_map, GrowableArray<ScopeValue*>* objects, bool realloc_failures, bool skip_internal) {
  for (int i = 0; i < objects->length(); i++) {
+    assert(objects->at(i)->is_object(), "invalid debug information");
    ObjectValue* sv = (ObjectValue*) objects->at(i);
    Klass* k = java_lang_Class::as_Klass(sv->klass()->as_ConstantOopReadValue()->value()());
    Handle obj = sv->value();
--- a/src/java.base/share/classes/java/security/AccessController.java
+++ b/src/java.base/share/classes/java/security/AccessController.java
@ -778,6 +778,13 @@ public final class AccessController {
        T result = action.run();
        assert isPrivileged(); // sanity check invariant

+        // The 'getStackAccessControlContext' call inside 'isPrivileged'
+        // requires that no Local was scalar replaced. However, in some
+        // situations, after inlining, 'result' (or part of a possibly
+        // allocation merge Phi leading to it) might become NonEscaping and get
+        // scalar replaced. The call below enforces 'result' to always escape.
+        ensureMaterializedForStackWalk(result);
+
        // Keep these alive across the run() call so they can be
        // retrieved by getStackAccessControlContext().
        Reference.reachabilityFence(context);
@ -809,6 +816,13 @@ public final class AccessController {
        T result = action.run();
        assert isPrivileged(); // sanity check invariant

+        // The 'getStackAccessControlContext' call inside 'isPrivileged'
+        // requires that no Local was scalar replaced. However, in some
+        // situations, after inlining, 'result' (or part of a possibly
+        // allocation merge Phi leading to it) might become NonEscaping and get
+        // scalar replaced. The call below enforces 'result' to always escape.
+        ensureMaterializedForStackWalk(result);
+
        // Keep these alive across the run() call so they can be
        // retrieved by getStackAccessControlContext().
        Reference.reachabilityFence(context);
--- a/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java
+++ b/test/hotspot/jtreg/compiler/c2/irTests/scalarReplacement/AllocationMergesTests.java