6674600: (Escape Analysis) Optimize memory graph for instance's fields

EA gives opportunite to do more aggressive memory optimizations. Reviewed-by: never, jrose
2008-03-20 15:11:44 -07:00 · 2008-03-20 15:11:44 -07:00 · cdd27962da
commit cdd27962da
parent 1ba2523386
7 changed files with 318 additions and 14 deletions
--- a/hotspot/src/share/vm/opto/callnode.cpp
+++ b/hotspot/src/share/vm/opto/callnode.cpp
@ -625,8 +625,8 @@ uint CallNode::match_edge(uint idx) const {
 }

 //
-// Determine whether the call could modify a memory value  of the
-// specified address type
+// Determine whether the call could modify the field of the specified
+// instance at the specified offset.
 //
 bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) {
  const TypeOopPtr *adrInst_t  = addr_t->isa_oopptr();
@ -638,9 +638,24 @@ bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) {
  Compile *C = phase->C;
  int offset = adrInst_t->offset();
  assert(offset >= 0, "should be valid offset");
-  assert(addr_t->isa_instptr() || addr_t->isa_aryptr(), "only instances or arrays are expected");
+  ciKlass* adr_k = adrInst_t->klass();
+  assert(adr_k->is_loaded() &&
+         adr_k->is_java_klass() &&
+         !adr_k->is_interface(),
+         "only non-abstract classes are expected");

  int base_idx = C->get_alias_index(adrInst_t);
+  int size = BytesPerLong; // If we don't know the size, assume largest.
+  if (adrInst_t->isa_instptr()) {
+    ciField* field = C->alias_type(base_idx)->field();
+    if (field != NULL) {
+      size = field->size_in_bytes();
+    }
+  } else {
+    assert(adrInst_t->isa_aryptr(), "only arrays are expected");
+    size = type2aelembytes(adr_k->as_array_klass()->element_type()->basic_type());
+  }
+
  ciMethod * meth = is_CallStaticJava() ?  as_CallStaticJava()->method() : NULL;
  BCEscapeAnalyzer *bcea = (meth != NULL) ? meth->get_bcea() : NULL;

@ -656,14 +671,19 @@ bool CallNode::may_modify(const TypePtr *addr_t, PhaseTransform *phase) {
    if (!arg->is_top() && (t->isa_oopptr() != NULL ||
                           t->isa_ptr() && at_ptr != NULL)) {
      assert(at_ptr != NULL, "expecting an OopPtr");
-      // If we have found an argument matching adr_base_t, check if the field
-      // at the specified offset is modified.  Since we don't know the size,
-      // assume 8.
-      int at_idx = C->get_alias_index(at_ptr->add_offset(offset)->isa_oopptr());
-      if (base_idx == at_idx &&
-          (bcea == NULL ||
-           bcea->is_arg_modified(i - TypeFunc::Parms, offset, 8))) {
-        return true;
+      ciKlass* at_k = at_ptr->klass();
+      if ((adrInst_t->base() == at_ptr->base()) &&
+          at_k->is_loaded() &&
+          at_k->is_java_klass() &&
+          !at_k->is_interface()) {
+        // If we have found an argument matching addr_t, check if the field
+        // at the specified offset is modified.
+        int at_idx = C->get_alias_index(at_ptr->add_offset(offset)->isa_oopptr());
+        if (base_idx == at_idx &&
+            (bcea == NULL ||
+             bcea->is_arg_modified(i - TypeFunc::Parms, offset, size))) {
+          return true;
+        }
      }
    }
  }
--- a/hotspot/src/share/vm/opto/callnode.hpp
+++ b/hotspot/src/share/vm/opto/callnode.hpp
@ -419,7 +419,7 @@ class SafePointScalarObjectNode: public TypeNode {
  uint _first_index; // First input edge index of a SafePoint node where
                     // states of the scalarized object fields are collected.
  uint _n_fields;    // Number of non-static fields of the scalarized object.
-  DEBUG_ONLY(AllocateNode* _alloc);
+  DEBUG_ONLY(AllocateNode* _alloc;)
 public:
  SafePointScalarObjectNode(const TypeOopPtr* tp,
 #ifdef ASSERT
--- a/hotspot/src/share/vm/opto/cfgnode.cpp
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp
@ -704,6 +704,61 @@ PhiNode* PhiNode::slice_memory(const TypePtr* adr_type) const {
  return mem;
 }

+//------------------------split_out_instance-----------------------------------
+// Split out an instance type from a bottom phi.
+PhiNode* PhiNode::split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const {
+  assert(type() == Type::MEMORY && (adr_type() == TypePtr::BOTTOM ||
+         adr_type() == TypeRawPtr::BOTTOM) , "bottom or raw memory required");
+
+  // Check if an appropriate node already exists.
+  Node *region = in(0);
+  for (DUIterator_Fast kmax, k = region->fast_outs(kmax); k < kmax; k++) {
+    Node* use = region->fast_out(k);
+    if( use->is_Phi()) {
+      PhiNode *phi2 = use->as_Phi();
+      if (phi2->type() == Type::MEMORY && phi2->adr_type() == at) {
+        return phi2;
+      }
+    }
+  }
+  Compile *C = igvn->C;
+  Arena *a = Thread::current()->resource_area();
+  Node_Array node_map = new Node_Array(a);
+  Node_Stack stack(a, C->unique() >> 4);
+  PhiNode *nphi = slice_memory(at);
+  igvn->register_new_node_with_optimizer( nphi );
+  node_map.map(_idx, nphi);
+  stack.push((Node *)this, 1);
+  while(!stack.is_empty()) {
+    PhiNode *ophi = stack.node()->as_Phi();
+    uint i = stack.index();
+    assert(i >= 1, "not control edge");
+    stack.pop();
+    nphi = node_map[ophi->_idx]->as_Phi();
+    for (; i < ophi->req(); i++) {
+      Node *in = ophi->in(i);
+      if (in == NULL || igvn->type(in) == Type::TOP)
+        continue;
+      Node *opt = MemNode::optimize_simple_memory_chain(in, at, igvn);
+      PhiNode *optphi = opt->is_Phi() ? opt->as_Phi() : NULL;
+      if (optphi != NULL && optphi->adr_type() == TypePtr::BOTTOM) {
+        opt = node_map[optphi->_idx];
+        if (opt == NULL) {
+          stack.push(ophi, i);
+          nphi = optphi->slice_memory(at);
+          igvn->register_new_node_with_optimizer( nphi );
+          node_map.map(optphi->_idx, nphi);
+          ophi = optphi;
+          i = 0; // will get incremented at top of loop
+          continue;
+        }
+      }
+      nphi->set_req(i, opt);
+    }
+  }
+  return nphi;
+}
+
 //------------------------verify_adr_type--------------------------------------
 #ifdef ASSERT
 void PhiNode::verify_adr_type(VectorSet& visited, const TypePtr* at) const {
@ -1736,6 +1791,18 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
        return result;
      }
    }
+    //
+    // Other optimizations on the memory chain
+    //
+    const TypePtr* at = adr_type();
+    for( uint i=1; i<req(); ++i ) {// For all paths in
+      Node *ii = in(i);
+      Node *new_in = MemNode::optimize_memory_chain(ii, at, phase);
+      if (ii != new_in ) {
+        set_req(i, new_in);
+        progress = this;
+      }
+    }
  }

  return progress;              // Return any progress
--- a/hotspot/src/share/vm/opto/cfgnode.hpp
+++ b/hotspot/src/share/vm/opto/cfgnode.hpp
@ -148,6 +148,7 @@ public:
  static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL );
  // create a new phi with narrowed memory type
  PhiNode* slice_memory(const TypePtr* adr_type) const;
+  PhiNode* split_out_instance(const TypePtr* at, PhaseIterGVN *igvn) const;
  // like make(r, x), but does not initialize the in edges to x
  static PhiNode* make_blank( Node* r, Node* x );

--- a/hotspot/src/share/vm/opto/graphKit.cpp
+++ b/hotspot/src/share/vm/opto/graphKit.cpp
@ -2922,10 +2922,22 @@ Node* GraphKit::new_instance(Node* klass_node,
  const TypeOopPtr* oop_type = tklass->as_instance_type();

  // Now generate allocation code
+
+  // With escape analysis, the entire memory state is needed to be able to
+  // eliminate the allocation.  If the allocations cannot be eliminated, this
+  // will be optimized to the raw slice when the allocation is expanded.
+  Node *mem;
+  if (C->do_escape_analysis()) {
+    mem = reset_memory();
+    set_all_memory(mem);
+  } else {
+    mem = memory(Compile::AliasIdxRaw);
+  }
+
  AllocateNode* alloc
    = new (C, AllocateNode::ParmLimit)
        AllocateNode(C, AllocateNode::alloc_type(),
-                     control(), memory(Compile::AliasIdxRaw), i_o(),
+                     control(), mem, i_o(),
                     size, klass_node,
                     initial_slow_test);

@ -3056,11 +3068,23 @@ Node* GraphKit::new_array(Node* klass_node,     // array klass (maybe variable)
  }

  // Now generate allocation code
+
+  // With escape analysis, the entire memory state is needed to be able to
+  // eliminate the allocation.  If the allocations cannot be eliminated, this
+  // will be optimized to the raw slice when the allocation is expanded.
+  Node *mem;
+  if (C->do_escape_analysis()) {
+    mem = reset_memory();
+    set_all_memory(mem);
+  } else {
+    mem = memory(Compile::AliasIdxRaw);
+  }
+
  // Create the AllocateArrayNode and its result projections
  AllocateArrayNode* alloc
    = new (C, AllocateArrayNode::ParmLimit)
        AllocateArrayNode(C, AllocateArrayNode::alloc_type(),
-                          control(), memory(Compile::AliasIdxRaw), i_o(),
+                          control(), mem, i_o(),
                          size, klass_node,
                          initial_slow_test,
                          length);
--- a/hotspot/src/share/vm/opto/memnode.cpp
+++ b/hotspot/src/share/vm/opto/memnode.cpp
@ -29,6 +29,8 @@
 #include "incls/_precompiled.incl"
 #include "incls/_memnode.cpp.incl"

+static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st);
+
 //=============================================================================
 uint MemNode::size_of() const { return sizeof(*this); }

@ -87,6 +89,60 @@ extern void print_alias_types();

 #endif

+Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) {
+  const TypeOopPtr *tinst = t_adr->isa_oopptr();
+  if (tinst == NULL || !tinst->is_instance_field())
+    return mchain;  // don't try to optimize non-instance types
+  uint instance_id = tinst->instance_id();
+  Node *prev = NULL;
+  Node *result = mchain;
+  while (prev != result) {
+    prev = result;
+    // skip over a call which does not affect this memory slice
+    if (result->is_Proj() && result->as_Proj()->_con == TypeFunc::Memory) {
+      Node *proj_in = result->in(0);
+      if (proj_in->is_Call()) {
+        CallNode *call = proj_in->as_Call();
+        if (!call->may_modify(t_adr, phase)) {
+          result = call->in(TypeFunc::Memory);
+        }
+      } else if (proj_in->is_Initialize()) {
+        AllocateNode* alloc = proj_in->as_Initialize()->allocation();
+        // Stop if this is the initialization for the object instance which
+        // which contains this memory slice, otherwise skip over it.
+        if (alloc != NULL && alloc->_idx != instance_id) {
+          result = proj_in->in(TypeFunc::Memory);
+        }
+      } else if (proj_in->is_MemBar()) {
+        result = proj_in->in(TypeFunc::Memory);
+      }
+    } else if (result->is_MergeMem()) {
+      result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty);
+    }
+  }
+  return result;
+}
+
+Node *MemNode::optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase) {
+  const TypeOopPtr *t_oop = t_adr->isa_oopptr();
+  bool is_instance = (t_oop != NULL) && t_oop->is_instance_field();
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  Node *result = mchain;
+  result = optimize_simple_memory_chain(result, t_adr, phase);
+  if (is_instance && igvn != NULL  && result->is_Phi()) {
+    PhiNode *mphi = result->as_Phi();
+    assert(mphi->bottom_type() == Type::MEMORY, "memory phi required");
+    const TypePtr *t = mphi->adr_type();
+    if (t == TypePtr::BOTTOM || t == TypeRawPtr::BOTTOM) {
+      // clone the Phi with our address type
+      result = mphi->split_out_instance(t_adr, igvn);
+    } else {
+      assert(phase->C->get_alias_index(t) == phase->C->get_alias_index(t_adr), "correct memory chain");
+    }
+  }
+  return result;
+}
+
 static Node *step_through_mergemem(PhaseGVN *phase, MergeMemNode *mmem,  const TypePtr *tp, const TypePtr *adr_check, outputStream *st) {
  uint alias_idx = phase->C->get_alias_index(tp);
  Node *mem = mmem;
@ -266,6 +322,8 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) {
  if (offset == Type::OffsetBot)
    return NULL;            // cannot unalias unless there are precise offsets

+  const TypeOopPtr *addr_t = adr->bottom_type()->isa_oopptr();
+
  intptr_t size_in_bytes = memory_size();

  Node* mem = in(MemNode::Memory);   // start searching here...
@ -345,6 +403,22 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) {
        return mem;         // let caller handle steps (c), (d)
      }

+    } else if (addr_t != NULL && addr_t->is_instance_field()) {
+      // Can't use optimize_simple_memory_chain() since it needs PhaseGVN.
+      if (mem->is_Proj() && mem->in(0)->is_Call()) {
+        CallNode *call = mem->in(0)->as_Call();
+        if (!call->may_modify(addr_t, phase)) {
+          mem = call->in(TypeFunc::Memory);
+          continue;         // (a) advance through independent call memory
+        }
+      } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) {
+        mem = mem->in(0)->in(TypeFunc::Memory);
+        continue;           // (a) advance through independent MemBar memory
+      } else if (mem->is_MergeMem()) {
+        int alias_idx = phase->C->get_alias_index(adr_type());
+        mem = mem->as_MergeMem()->memory_at(alias_idx);
+        continue;           // (a) advance through independent MergeMem memory
+      }
    }

    // Unless there is an explicit 'continue', we must bail out here,
@ -1011,6 +1085,122 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    }
  }

+  Node* mem = in(MemNode::Memory);
+  const TypePtr *addr_t = phase->type(address)->isa_ptr();
+
+  if (addr_t != NULL) {
+    // try to optimize our memory input
+    Node* opt_mem = MemNode::optimize_memory_chain(mem, addr_t, phase);
+    if (opt_mem != mem) {
+      set_req(MemNode::Memory, opt_mem);
+      return this;
+    }
+    const TypeOopPtr *t_oop = addr_t->isa_oopptr();
+    if (can_reshape && opt_mem->is_Phi() &&
+        (t_oop != NULL) && t_oop->is_instance_field()) {
+      assert(t_oop->offset() != Type::OffsetBot && t_oop->offset() != Type::OffsetTop, "");
+      Node *region = opt_mem->in(0);
+      uint cnt = opt_mem->req();
+      for( uint i = 1; i < cnt; i++ ) {
+        Node *in = opt_mem->in(i);
+        if( in == NULL ) {
+          region = NULL; // Wait stable graph
+          break;
+        }
+      }
+      if (region != NULL) {
+        // Check for loop invariant.
+        if (cnt == 3) {
+          for( uint i = 1; i < cnt; i++ ) {
+            Node *in = opt_mem->in(i);
+            Node* m = MemNode::optimize_memory_chain(in, addr_t, phase);
+            if (m == opt_mem) {
+              set_req(MemNode::Memory, opt_mem->in(cnt - i)); // Skip this phi.
+              return this;
+            }
+          }
+        }
+        // Split through Phi (see original code in loopopts.cpp).
+        assert(phase->C->have_alias_type(addr_t), "instance should have alias type");
+        const Type* this_type = this->bottom_type();
+        int this_index  = phase->C->get_alias_index(addr_t);
+        int this_offset = addr_t->offset();
+        int this_iid    = addr_t->is_oopptr()->instance_id();
+        int wins = 0;
+        PhaseIterGVN *igvn = phase->is_IterGVN();
+        Node *phi = new (igvn->C, region->req()) PhiNode(region, this_type, NULL, this_iid, this_index, this_offset);
+        for( uint i = 1; i < region->req(); i++ ) {
+          Node *x;
+          Node* the_clone = NULL;
+          if( region->in(i) == phase->C->top() ) {
+            x = phase->C->top();      // Dead path?  Use a dead data op
+          } else {
+            x = this->clone();        // Else clone up the data op
+            the_clone = x;            // Remember for possible deletion.
+            // Alter data node to use pre-phi inputs
+            if( this->in(0) == region ) {
+              x->set_req( 0, region->in(i) );
+            } else {
+              x->set_req( 0, NULL );
+            }
+            for( uint j = 1; j < this->req(); j++ ) {
+              Node *in = this->in(j);
+              if( in->is_Phi() && in->in(0) == region )
+                x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
+            }
+          }
+          // Check for a 'win' on some paths
+          const Type *t = x->Value(igvn);
+
+          bool singleton = t->singleton();
+
+          // See comments in PhaseIdealLoop::split_thru_phi().
+          if( singleton && t == Type::TOP ) {
+            singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
+          }
+
+          if( singleton ) {
+            wins++;
+            x = igvn->makecon(t);
+          } else {
+            // We now call Identity to try to simplify the cloned node.
+            // Note that some Identity methods call phase->type(this).
+            // Make sure that the type array is big enough for
+            // our new node, even though we may throw the node away.
+            // (This tweaking with igvn only works because x is a new node.)
+            igvn->set_type(x, t);
+            Node *y = x->Identity(igvn);
+            if( y != x ) {
+              wins++;
+              x = y;
+            } else {
+              y = igvn->hash_find(x);
+              if( y ) {
+                wins++;
+                x = y;
+              } else {
+                // Else x is a new node we are keeping
+                // We do not need register_new_node_with_optimizer
+                // because set_type has already been called.
+                igvn->_worklist.push(x);
+              }
+            }
+          }
+          if (x != the_clone && the_clone != NULL)
+            igvn->remove_dead_node(the_clone);
+          phi->set_req(i, x);
+        }
+        if( wins > 0 ) {
+          // Record Phi
+          igvn->register_new_node_with_optimizer(phi);
+          return phi;
+        } else {
+          igvn->remove_dead_node(phi);
+        }
+      }
+    }
+  }
+
  // Check for prior store with a different base or offset; make Load
  // independent.  Skip through any number of them.  Bail out if the stores
  // are in an endless dead cycle and report no progress.  This is a key
--- a/hotspot/src/share/vm/opto/memnode.hpp
+++ b/hotspot/src/share/vm/opto/memnode.hpp
@ -67,6 +67,8 @@ public:
                                      PhaseTransform* phase);
  static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast);

+  static Node *optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase);
+  static Node *optimize_memory_chain(Node *mchain, const TypePtr *t_adr, PhaseGVN *phase);
  // This one should probably be a phase-specific function:
  static bool detect_dominating_control(Node* dom, Node* sub);