Merge

2009-07-24 16:40:56 -07:00 · 2009-07-24 16:40:56 -07:00 · 6ac5f30765
commit 6ac5f30765
parent 432d405985 1b6412363e
93 changed files with 2228 additions and 1507 deletions
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
@ -4208,6 +4208,7 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offs
                  PtrQueue::byte_offset_of_active()),
         tmp);
  }
+
  // Check on whether to annul.
  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
  delayed() -> nop();
@ -4215,13 +4216,13 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offs
  // satb_log_barrier_work1(tmp, offset);
  if (index == noreg) {
    if (Assembler::is_simm13(offset)) {
-      ld_ptr(obj, offset, tmp);
+      load_heap_oop(obj, offset, tmp);
    } else {
      set(offset, tmp);
-      ld_ptr(obj, tmp, tmp);
+      load_heap_oop(obj, tmp, tmp);
    }
  } else {
-    ld_ptr(obj, index, tmp);
+    load_heap_oop(obj, index, tmp);
  }

  // satb_log_barrier_work2(obj, tmp, offset);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -6805,14 +6805,18 @@ void MacroAssembler::g1_write_barrier_pre(Register obj,
  jcc(Assembler::equal, done);

  // if (x.f == NULL) goto done;
-  cmpptr(Address(obj, 0), NULL_WORD);
+#ifdef _LP64
+  load_heap_oop(tmp2, Address(obj, 0));
+#else
+  movptr(tmp2, Address(obj, 0));
+#endif
+  cmpptr(tmp2, (int32_t) NULL_WORD);
  jcc(Assembler::equal, done);

  // Can we store original value in the thread's buffer?

-  LP64_ONLY(movslq(tmp, index);)
-  movptr(tmp2, Address(obj, 0));
 #ifdef _LP64
+  movslq(tmp, index);
  cmpq(tmp, 0);
 #else
  cmpl(index, 0);
@ -6834,8 +6838,7 @@ void MacroAssembler::g1_write_barrier_pre(Register obj,
  if(tosca_live) push(rax);
  push(obj);
 #ifdef _LP64
-  movq(c_rarg0, Address(obj, 0));
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
 #else
  push(thread);
  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
--- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp
@ -269,11 +269,11 @@ void MethodHandles::remove_arg_slots(MacroAssembler* _masm,

 #ifndef PRODUCT
 void trace_method_handle_stub(const char* adaptername,
-                              oop mh,
+                              oopDesc* mh,
                              intptr_t* entry_sp,
                              intptr_t* saved_sp) {
  // called as a leaf from native code: do not block the JVM!
-  printf("MH %s "PTR_FORMAT" "PTR_FORMAT" "INTX_FORMAT"\n", adaptername, mh, entry_sp, entry_sp - saved_sp);
+  printf("MH %s "PTR_FORMAT" "PTR_FORMAT" "INTX_FORMAT"\n", adaptername, (void*)mh, entry_sp, entry_sp - saved_sp);
 }
 #endif //PRODUCT

--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@ -1302,22 +1302,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,

  const Register ic_reg = rax;
  const Register receiver = j_rarg0;
-  const Register tmp = rdx;

  Label ok;
  Label exception_pending;

+  assert_different_registers(ic_reg, receiver, rscratch1);
  __ verify_oop(receiver);
-  __ push(tmp); // spill (any other registers free here???)
-  __ load_klass(tmp, receiver);
-  __ cmpq(ic_reg, tmp);
+  __ load_klass(rscratch1, receiver);
+  __ cmpq(ic_reg, rscratch1);
  __ jcc(Assembler::equal, ok);

-  __ pop(tmp);
  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));

  __ bind(ok);
-  __ pop(tmp);

  // Verified entry point must be aligned
  __ align(8);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@ -709,7 +709,7 @@ class StubGenerator: public StubCodeGenerator {
  //
  //  Input:
  //     start   -  starting address
-  //     end     -  element count
+  //     count   -  element count
  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
    assert_different_registers(start, count);
    BarrierSet* bs = Universe::heap()->barrier_set();
@ -757,7 +757,6 @@ class StubGenerator: public StubCodeGenerator {
          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
          __ addptr(rsp, 2*wordSize);
          __ popa();
-
        }
        break;

--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@ -1207,9 +1207,9 @@ class StubGenerator: public StubCodeGenerator {
          __ pusha();                      // push registers (overkill)
          // must compute element count unless barrier set interface is changed (other platforms supply count)
          assert_different_registers(start, end, scratch);
-          __ lea(scratch, Address(end, wordSize));
-          __ subptr(scratch, start);
-          __ shrptr(scratch, LogBytesPerWord);
+          __ lea(scratch, Address(end, BytesPerHeapOop));
+          __ subptr(scratch, start);               // subtract start to get #bytes
+          __ shrptr(scratch, LogBytesPerHeapOop);  // convert to element count
          __ mov(c_rarg0, start);
          __ mov(c_rarg1, scratch);
          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
@ -1225,6 +1225,7 @@ class StubGenerator: public StubCodeGenerator {
          Label L_loop;

           __ shrptr(start, CardTableModRefBS::card_shift);
+           __ addptr(end, BytesPerHeapOop);
           __ shrptr(end, CardTableModRefBS::card_shift);
           __ subptr(end, start); // number of bytes to copy

@ -2251,6 +2252,7 @@ class StubGenerator: public StubCodeGenerator {
    // and report their number to the caller.
    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
    __ lea(end_to, to_element_addr);
+    __ addptr(end_to, -heapOopSize);      // make an inclusive end pointer
    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
    __ movptr(rax, r14_length);           // original oops
    __ addptr(rax, count);                // K = (original - remaining) oops
@ -2259,7 +2261,7 @@ class StubGenerator: public StubCodeGenerator {

    // Come here on success only.
    __ BIND(L_do_card_marks);
-    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
+    __ addptr(end_to, -heapOopSize);         // make an inclusive end pointer
    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
    __ xorptr(rax, rax);                  // return 0 on success

--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@ -420,6 +420,13 @@ Form::DataType InstructForm::is_ideal_load() const {
  return  _matrule->is_ideal_load();
 }

+// Return 'true' if this instruction matches an ideal 'LoadKlass' node
+bool InstructForm::skip_antidep_check() const {
+  if( _matrule == NULL ) return false;
+
+  return  _matrule->skip_antidep_check();
+}
+
 // Return 'true' if this instruction matches an ideal 'Load?' node
 Form::DataType InstructForm::is_ideal_store() const {
  if( _matrule == NULL ) return Form::none;
@ -567,6 +574,8 @@ bool InstructForm::rematerialize(FormDict &globals, RegisterForm *registers ) {

 // loads from memory, so must check for anti-dependence
 bool InstructForm::needs_anti_dependence_check(FormDict &globals) const {
+  if ( skip_antidep_check() ) return false;
+
  // Machine independent loads must be checked for anti-dependences
  if( is_ideal_load() != Form::none )  return true;

@ -3957,6 +3966,28 @@ Form::DataType MatchRule::is_ideal_load() const {
 }


+bool MatchRule::skip_antidep_check() const {
+  // Some loads operate on what is effectively immutable memory so we
+  // should skip the anti dep computations.  For some of these nodes
+  // the rewritable field keeps the anti dep logic from triggering but
+  // for certain kinds of LoadKlass it does not since they are
+  // actually reading memory which could be rewritten by the runtime,
+  // though never by generated code.  This disables it uniformly for
+  // the nodes that behave like this: LoadKlass, LoadNKlass and
+  // LoadRange.
+  if ( _opType && (strcmp(_opType,"Set") == 0) && _rChild ) {
+    const char *opType = _rChild->_opType;
+    if (strcmp("LoadKlass", opType) == 0 ||
+        strcmp("LoadNKlass", opType) == 0 ||
+        strcmp("LoadRange", opType) == 0) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
 Form::DataType MatchRule::is_ideal_store() const {
  Form::DataType ideal_store = Form::none;

--- a/hotspot/src/share/vm/adlc/formssel.hpp
+++ b/hotspot/src/share/vm/adlc/formssel.hpp
@ -158,6 +158,9 @@ public:

  virtual Form::CallType is_ideal_call() const; // matches ideal 'Call'
  virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode'
+  // Should antidep checks be disabled for this Instruct
+  // See definition of MatchRule::skip_antidep_check
+  bool skip_antidep_check() const;
  virtual Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode'
          bool        is_ideal_mem() const { return is_ideal_load() != Form::none || is_ideal_store() != Form::none; }
  virtual uint        two_address(FormDict &globals); // output reg must match input reg
@ -1003,6 +1006,9 @@ public:
  bool       is_ideal_loopEnd() const; // node matches ideal 'LoopEnd'
  bool       is_ideal_bool() const;    // node matches ideal 'Bool'
  Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode'
+  // Should antidep checks be disabled for this rule
+  // See definition of MatchRule::skip_antidep_check
+  bool skip_antidep_check() const;
  Form::DataType is_ideal_store() const;// node matches ideal 'StoreXNode'

  // Check if 'mRule2' is a cisc-spill variant of this MatchRule
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
@ -3231,6 +3231,16 @@ instanceKlassHandle ClassFileParser::parseClassFile(symbolHandle name,
    this_klass->set_minor_version(minor_version);
    this_klass->set_major_version(major_version);

+    // Set up methodOop::intrinsic_id as soon as we know the names of methods.
+    // (We used to do this lazily, but now we query it in Rewriter,
+    // which is eagerly done for every method, so we might as well do it now,
+    // when everything is fresh in memory.)
+    if (methodOopDesc::klass_id_for_intrinsics(this_klass->as_klassOop()) != vmSymbols::NO_SID) {
+      for (int j = 0; j < methods->length(); j++) {
+        ((methodOop)methods->obj_at(j))->init_intrinsic_id();
+      }
+    }
+
    if (cached_class_file_bytes != NULL) {
      // JVMTI: we have an instanceKlass now, tell it about the cached bytes
      this_klass->set_cached_class_file(cached_class_file_bytes,
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -513,9 +513,6 @@
 //
 // for Emacs: (let ((c-backslash-column 120) (c-backslash-max-column 120)) (c-backslash-region (point) (point-max) nil t))
 #define VM_INTRINSICS_DO(do_intrinsic, do_class, do_name, do_signature, do_alias)                                       \
-  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,      F_R)   \
-  /*    (symbol object_initializer_name defined above) */                                                               \
-                                                                                                                        \
  do_intrinsic(_hashCode,                 java_lang_Object,       hashCode_name, void_int_signature,             F_R)   \
   do_name(     hashCode_name,                                   "hashCode")                                            \
  do_intrinsic(_getClass,                 java_lang_Object,       getClass_name, void_class_signature,           F_R)   \
@ -635,9 +632,6 @@
  do_intrinsic(_equalsC,                  java_util_Arrays,       equals_name,    equalsC_signature,             F_S)   \
   do_signature(equalsC_signature,                               "([C[C)Z")                                             \
                                                                                                                        \
-  do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
-  /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
-                                                                                                                        \
  do_intrinsic(_compareTo,                java_lang_String,       compareTo_name, string_int_signature,          F_R)   \
   do_name(     compareTo_name,                                  "compareTo")                                           \
  do_intrinsic(_indexOf,                  java_lang_String,       indexOf_name, string_int_signature,            F_R)   \
@ -656,8 +650,6 @@
   do_name(     attemptUpdate_name,                                 "attemptUpdate")                                    \
   do_signature(attemptUpdate_signature,                            "(JJ)Z")                                            \
                                                                                                                        \
-  do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
-                                                                                                                        \
  /* support for sun.misc.Unsafe */                                                                                     \
  do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                        \
@ -819,10 +811,22 @@
   do_name(     prefetchReadStatic_name,                         "prefetchReadStatic")                                  \
  do_intrinsic(_prefetchWriteStatic,      sun_misc_Unsafe,        prefetchWriteStatic_name, prefetch_signature,  F_SN)  \
   do_name(     prefetchWriteStatic_name,                        "prefetchWriteStatic")                                 \
+    /*== LAST_COMPILER_INLINE*/                                                                                         \
+    /*the compiler does have special inlining code for these; bytecode inline is just fine */                           \
+                                                                                                                        \
+  do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
+                                                                                                                        \
+  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,      F_R)   \
+  /*    (symbol object_initializer_name defined above) */                                                               \
+                                                                                                                        \
+  do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
+  /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
+                                                                                                                        \
    /*end*/



+
 // Class vmSymbols

 class vmSymbols: AllStatic {
@ -935,6 +939,7 @@ class vmIntrinsics: AllStatic {
    #undef VM_INTRINSIC_ENUM

    ID_LIMIT,
+    LAST_COMPILER_INLINE = _prefetchWriteStatic,
    FIRST_ID = _none + 1
  };

@ -972,4 +977,7 @@ public:
  static Flags              flags_for(ID id);

  static const char* short_name_as_C_string(ID id, char* buf, int size);
+
+  // Access to intrinsic methods:
+  static methodOop method_for(ID id);
 };
--- a/hotspot/src/share/vm/compiler/oopMap.cpp
+++ b/hotspot/src/share/vm/compiler/oopMap.cpp
@ -379,7 +379,15 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
        if ( loc != NULL ) {
          oop *base_loc    = fr->oopmapreg_to_location(omv.content_reg(), reg_map);
          oop *derived_loc = loc;
-          derived_oop_fn(base_loc, derived_loc);
+          oop val = *base_loc;
+          if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) {
+            // Ignore NULL oops and decoded NULL narrow oops which
+            // equal to Universe::narrow_oop_base when a narrow oop
+            // implicit null check is used in compiled code.
+            // The narrow_oop_base could be NULL or be the address
+            // of the page below heap depending on compressed oops mode.
+          } else
+            derived_oop_fn(base_loc, derived_loc);
        }
        oms.next();
      }  while (!oms.is_done());
@ -394,6 +402,15 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
      oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map);
      if ( loc != NULL ) {
        if ( omv.type() == OopMapValue::oop_value ) {
+          oop val = *loc;
+          if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) {
+            // Ignore NULL oops and decoded NULL narrow oops which
+            // equal to Universe::narrow_oop_base when a narrow oop
+            // implicit null check is used in compiled code.
+            // The narrow_oop_base could be NULL or be the address
+            // of the page below heap depending on compressed oops mode.
+            continue;
+          }
 #ifdef ASSERT
          if ((((uintptr_t)loc & (sizeof(*loc)-1)) != 0) ||
             !Universe::heap()->is_in_or_null(*loc)) {
@ -410,6 +427,8 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
 #endif // ASSERT
          oop_fn->do_oop(loc);
        } else if ( omv.type() == OopMapValue::value_value ) {
+          assert((*loc) == (oop)NULL || !Universe::is_narrow_oop_base(*loc),
+                 "found invalid value pointer");
          value_fn->do_oop(loc);
        } else if ( omv.type() == OopMapValue::narrowoop_value ) {
          narrowOop *nl = (narrowOop*)loc;
--- a/hotspot/src/share/vm/compiler/oopMap.hpp
+++ b/hotspot/src/share/vm/compiler/oopMap.hpp
@ -233,6 +233,10 @@ class OopMapSet : public ResourceObj {
  int heap_size() const;
  void copy_to(address addr);

+  // Methods oops_do() and all_do() filter out NULL oops and
+  // oop == Universe::narrow_oop_base() before passing oops
+  // to closures.
+
  // Iterates through frame for a compiled method
  static void oops_do            (const frame* fr,
                                  const RegisterMap* reg_map, OopClosure* f);
--- a/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
@ -42,35 +42,40 @@ protected:
    BufferLength = 1024
  };

-  oop          *_buffer[BufferLength];
-  oop         **_buffer_top;
-  oop         **_buffer_curr;
+  StarTask  _buffer[BufferLength];
+  StarTask* _buffer_top;
+  StarTask* _buffer_curr;

-  OopClosure  *_oc;
-  double       _closure_app_seconds;
+  OopClosure* _oc;
+  double      _closure_app_seconds;

  void process_buffer () {
-
    double start = os::elapsedTime();
-    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
-      _oc->do_oop(*curr);
+    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
+      if (curr->is_narrow()) {
+        assert(UseCompressedOops, "Error");
+        _oc->do_oop((narrowOop*)(*curr));
+      } else {
+        _oc->do_oop((oop*)(*curr));
+      }
    }
    _buffer_curr = _buffer;
    _closure_app_seconds += (os::elapsedTime() - start);
  }

-public:
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop *p) {
+  template <class T> inline void do_oop_work(T* p) {
    if (_buffer_curr == _buffer_top) {
      process_buffer();
    }
-
-    *_buffer_curr = p;
+    StarTask new_ref(p);
+    *_buffer_curr = new_ref;
    ++_buffer_curr;
  }
+
+public:
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }
+
  void done () {
    if (_buffer_curr > _buffer) {
      process_buffer();
@ -88,18 +93,17 @@ public:
 class BufferingOopsInGenClosure: public OopsInGenClosure {
  BufferingOopClosure _boc;
  OopsInGenClosure* _oc;
-public:
+ protected:
+  template <class T> inline void do_oop_work(T* p) {
+    assert(generation()->is_in_reserved((void*)p), "Must be in!");
+    _boc.do_oop(p);
+  }
+ public:
  BufferingOopsInGenClosure(OopsInGenClosure *oc) :
    _boc(oc), _oc(oc) {}

-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-
-  virtual void do_oop(oop* p) {
-    assert(generation()->is_in_reserved(p), "Must be in!");
-    _boc.do_oop(p);
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }

  void done() {
    _boc.done();
@ -130,14 +134,14 @@ private:
    BufferLength = 1024
  };

-  oop                      *_buffer[BufferLength];
-  oop                     **_buffer_top;
-  oop                     **_buffer_curr;
+  StarTask     _buffer[BufferLength];
+  StarTask*    _buffer_top;
+  StarTask*    _buffer_curr;

-  HeapRegion               *_hr_buffer[BufferLength];
-  HeapRegion              **_hr_curr;
+  HeapRegion*  _hr_buffer[BufferLength];
+  HeapRegion** _hr_curr;

-  OopsInHeapRegionClosure  *_oc;
+  OopsInHeapRegionClosure*  _oc;
  double                    _closure_app_seconds;

  void process_buffer () {
@ -146,15 +150,20 @@ private:
           "the two lengths should be the same");

    double start = os::elapsedTime();
-    HeapRegion **hr_curr = _hr_buffer;
-    HeapRegion *hr_prev = NULL;
-    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
-      HeapRegion *region = *hr_curr;
+    HeapRegion** hr_curr = _hr_buffer;
+    HeapRegion*  hr_prev = NULL;
+    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion* region = *hr_curr;
      if (region != hr_prev) {
        _oc->set_region(region);
        hr_prev = region;
      }
-      _oc->do_oop(*curr);
+      if (curr->is_narrow()) {
+        assert(UseCompressedOops, "Error");
+        _oc->do_oop((narrowOop*)(*curr));
+      } else {
+        _oc->do_oop((oop*)(*curr));
+      }
      ++hr_curr;
    }
    _buffer_curr = _buffer;
@ -163,17 +172,16 @@ private:
  }

 public:
-  virtual void do_oop(narrowOop *p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  virtual void do_oop(oop *p) {
+  template <class T> void do_oop_work(T* p) {
    if (_buffer_curr == _buffer_top) {
      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
      process_buffer();
    }
-
-    *_buffer_curr = p;
+    StarTask new_ref(p);
+    *_buffer_curr = new_ref;
    ++_buffer_curr;
    *_hr_curr = _from;
    ++_hr_curr;
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -452,13 +452,10 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
  _regionStack.allocate(G1MarkRegionStackSize);

  // Create & start a ConcurrentMark thread.
-  if (G1ConcMark) {
-    _cmThread = new ConcurrentMarkThread(this);
-    assert(cmThread() != NULL, "CM Thread should have been created");
-    assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
-  } else {
-    _cmThread = NULL;
-  }
+  _cmThread = new ConcurrentMarkThread(this);
+  assert(cmThread() != NULL, "CM Thread should have been created");
+  assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
+
  _g1h = G1CollectedHeap::heap();
  assert(CGC_lock != NULL, "Where's the CGC_lock?");
  assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
@ -783,18 +780,18 @@ public:
                     bool do_barrier) : _cm(cm), _g1h(g1h),
                                        _do_barrier(do_barrier) { }

-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  virtual void do_oop(oop* p) {
-    oop thisOop = *p;
-    if (thisOop != NULL) {
-      assert(thisOop->is_oop() || thisOop->mark() == NULL,
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      assert(obj->is_oop() || obj->mark() == NULL,
             "expected an oop, possibly with mark word displaced");
-      HeapWord* addr = (HeapWord*)thisOop;
+      HeapWord* addr = (HeapWord*)obj;
      if (_g1h->is_in_g1_reserved(addr)) {
-        _cm->grayRoot(thisOop);
+        _cm->grayRoot(obj);
      }
    }
    if (_do_barrier) {
@ -850,16 +847,6 @@ void ConcurrentMark::checkpointRootsInitial() {
  double start = os::elapsedTime();
  GCOverheadReporter::recordSTWStart(start);

-  // If there has not been a GC[n-1] since last GC[n] cycle completed,
-  // precede our marking with a collection of all
-  // younger generations to keep floating garbage to a minimum.
-  // YSR: we won't do this for now -- it's an optimization to be
-  // done post-beta.
-
-  // YSR:    ignoring weak refs for now; will do at bug fixing stage
-  // EVM:    assert(discoveredRefsAreClear());
-
-
  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
  g1p->record_concurrent_mark_init_start();
  checkpointRootsInitialPre();
@ -1135,6 +1122,13 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
    return;
  }

+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(before)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(true, false, true);
+  }
+
  G1CollectorPolicy* g1p = g1h->g1_policy();
  g1p->record_concurrent_mark_remark_start();

@ -1159,10 +1153,12 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
    JavaThread::satb_mark_queue_set().set_active_all_threads(false);

    if (VerifyDuringGC) {
-      g1h->prepare_for_verify();
-      g1h->verify(/* allow_dirty */      true,
-                  /* silent */           false,
-                  /* use_prev_marking */ false);
+      HandleMark hm;  // handle scope
+      gclog_or_tty->print(" VerifyDuringGC:(after)");
+      Universe::heap()->prepare_for_verify();
+      Universe::heap()->verify(/* allow_dirty */      true,
+                               /* silent */           false,
+                               /* use_prev_marking */ false);
    }
  }

@ -1658,6 +1654,15 @@ void ConcurrentMark::cleanup() {
    return;
  }

+  if (VerifyDuringGC) {
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(before)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(/* allow dirty  */ true,
+                     /* silent       */ false,
+                     /* prev marking */ true);
+  }
+
  _cleanup_co_tracker.disable();

  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
@ -1790,10 +1795,12 @@ void ConcurrentMark::cleanup() {
  g1h->increment_total_collections();

  if (VerifyDuringGC) {
-    g1h->prepare_for_verify();
-    g1h->verify(/* allow_dirty */      true,
-                /* silent */           false,
-                /* use_prev_marking */ true);
+    HandleMark hm;  // handle scope
+    gclog_or_tty->print(" VerifyDuringGC:(after)");
+    Universe::heap()->prepare_for_verify();
+    Universe::verify(/* allow dirty  */ true,
+                     /* silent       */ false,
+                     /* prev marking */ true);
  }
 }

@ -1852,12 +1859,11 @@ class G1CMKeepAliveClosure: public OopClosure {
    _g1(g1), _cm(cm),
    _bitMap(bitMap) {}

-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  void do_oop(oop* p) {
-    oop thisOop = *p;
+  template <class T> void do_oop_work(T* p) {
+    oop thisOop = oopDesc::load_decode_heap_oop(p);
    HeapWord* addr = (HeapWord*)thisOop;
    if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
      _bitMap->mark(addr);
@ -2016,12 +2022,11 @@ public:
  ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
    _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }

-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(      oop* p) { do_oop_work(p); }

-  void do_oop(oop* p) {
-    oop         obj = *p;
+  template <class T> void do_oop_work(T* p) {
+    oop         obj = oopDesc::load_decode_heap_oop(p);
    const char* str = NULL;
    const char* str2 = "";

@ -2163,6 +2168,7 @@ void ConcurrentMark::deal_with_reference(oop obj) {


  HeapWord* objAddr = (HeapWord*) obj;
+  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  if (_g1h->is_in_g1_reserved(objAddr)) {
    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
    HeapRegion* hr = _g1h->heap_region_containing(obj);
@ -2380,7 +2386,7 @@ class CSMarkOopClosure: public OopClosure {
    }
  }

-  bool drain() {
+  template <class T> bool drain() {
    while (_ms_ind > 0) {
      oop obj = pop();
      assert(obj != NULL, "Since index was non-zero.");
@ -2394,9 +2400,8 @@ class CSMarkOopClosure: public OopClosure {
        }
        // Now process this portion of this one.
        int lim = MIN2(next_arr_ind, len);
-        assert(!UseCompressedOops, "This needs to be fixed");
        for (int j = arr_ind; j < lim; j++) {
-          do_oop(aobj->obj_at_addr<oop>(j));
+          do_oop(aobj->obj_at_addr<T>(j));
        }

      } else {
@ -2423,13 +2428,13 @@ public:
    FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
  }

-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  void do_oop(oop* p) {
-    oop obj = *p;
-    if (obj == NULL) return;
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (oopDesc::is_null(heap_oop)) return;
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
    if (obj->is_forwarded()) {
      // If the object has already been forwarded, we have to make sure
      // that it's marked.  So follow the forwarding pointer.  Note that
@ -2478,7 +2483,11 @@ public:
    oop obj = oop(addr);
    if (!obj->is_forwarded()) {
      if (!_oop_cl.push(obj)) return false;
-      if (!_oop_cl.drain()) return false;
+      if (UseCompressedOops) {
+        if (!_oop_cl.drain<narrowOop>()) return false;
+      } else {
+        if (!_oop_cl.drain<oop>()) return false;
+      }
    }
    // Otherwise...
    return true;
@ -2636,9 +2645,6 @@ void ConcurrentMark::disable_co_trackers() {

 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
-  // If we're not marking, nothing to do.
-  if (!G1ConcMark) return;
-
  // Clear all marks to force marking thread to do nothing
  _nextMarkBitMap->clearAll();
  // Empty mark stack
@ -2814,14 +2820,14 @@ private:
  CMTask*            _task;

 public:
-  void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  void do_oop(oop* p) {
+  template <class T> void do_oop_work(T* p) {
    tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
+    tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );

-    oop obj = *p;
+    oop obj = oopDesc::load_decode_heap_oop(p);
    if (_cm->verbose_high())
      gclog_or_tty->print_cr("[%d] we're looking at location "
                             "*"PTR_FORMAT" = "PTR_FORMAT,
@ -2967,6 +2973,7 @@ void CMTask::deal_with_reference(oop obj) {
  ++_refs_reached;

  HeapWord* objAddr = (HeapWord*) obj;
+  assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
  if (_g1h->is_in_g1_reserved(objAddr)) {
    tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
    HeapRegion* hr =  _g1h->heap_region_containing(obj);
@ -3030,6 +3037,7 @@ void CMTask::deal_with_reference(oop obj) {
 void CMTask::push(oop obj) {
  HeapWord* objAddr = (HeapWord*) obj;
  tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
+  tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
  tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
  tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );

@ -3275,6 +3283,8 @@ void CMTask::drain_local_queue(bool partially) {

      tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
                        "invariant" );
+      tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
+                        "invariant" );

      scan_object(obj);

--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -763,6 +763,7 @@ private:
  CMBitMap*                   _nextMarkBitMap;
  // the task queue of this task
  CMTaskQueue*                _task_queue;
+private:
  // the task queue set---needed for stealing
  CMTaskQueueSet*             _task_queues;
  // indicates whether the task has been claimed---this is only  for
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
@ -424,7 +424,7 @@ G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
      while (n <= next_boundary) {
        q = n;
        oop obj = oop(q);
-        if (obj->klass() == NULL) return q;
+        if (obj->klass_or_null() == NULL) return q;
        n += obj->size();
      }
      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
@ -436,7 +436,7 @@ G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
      while (n <= next_boundary) {
        q = n;
        oop obj = oop(q);
-        if (obj->klass() == NULL) return q;
+        if (obj->klass_or_null() == NULL) return q;
        n += _sp->block_size(q);
      }
      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
@ -96,14 +96,14 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
    while (n <= addr) {
      q = n;
      oop obj = oop(q);
-      if (obj->klass() == NULL) return q;
+      if (obj->klass_or_null() == NULL) return q;
      n += obj->size();
    }
  } else {
    while (n <= addr) {
      q = n;
      oop obj = oop(q);
-      if (obj->klass() == NULL) return q;
+      if (obj->klass_or_null() == NULL) return q;
      n += _sp->block_size(q);
    }
  }
@ -115,7 +115,7 @@ forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
 inline HeapWord*
 G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
                                                     const void* addr) {
-  if (oop(q)->klass() == NULL) return q;
+  if (oop(q)->klass_or_null() == NULL) return q;
  HeapWord* n = q + _sp->block_size(q);
  // In the normal case, where the query "addr" is a card boundary, and the
  // offset table chunks are the same size as cards, the block starting at
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -56,8 +56,8 @@ class ConcurrentZFThread;
 #  define IF_G1_DETAILED_STATS(code)
 #endif

-typedef GenericTaskQueue<oop*>    RefToScanQueue;
-typedef GenericTaskQueueSet<oop*> RefToScanQueueSet;
+typedef GenericTaskQueue<StarTask>    RefToScanQueue;
+typedef GenericTaskQueueSet<StarTask> RefToScanQueueSet;

 typedef int RegionIdx_t;   // needs to hold [ 0..max_regions() )
 typedef int CardIdx_t;     // needs to hold [ 0..CardsPerRegion )
@ -700,6 +700,9 @@ public:
  size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
  virtual size_t capacity() const;
  virtual size_t used() const;
+  // This should be called when we're not holding the heap lock. The
+  // result might be a bit inaccurate.
+  size_t used_unlocked() const;
  size_t recalculate_used() const;
 #ifndef PRODUCT
  size_t recalculate_used_regions() const;
@ -1271,6 +1274,552 @@ public:

 };

-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
+#define use_local_bitmaps         1
+#define verify_local_bitmaps      0
+#define oop_buffer_length       256
+
+#ifndef PRODUCT
+class GCLabBitMap;
+class GCLabBitMapClosure: public BitMapClosure {
+private:
+  ConcurrentMark* _cm;
+  GCLabBitMap*    _bitmap;
+
+public:
+  GCLabBitMapClosure(ConcurrentMark* cm,
+                     GCLabBitMap* bitmap) {
+    _cm     = cm;
+    _bitmap = bitmap;
+  }
+
+  virtual bool do_bit(size_t offset);
+};
+#endif // !PRODUCT
+
+class GCLabBitMap: public BitMap {
+private:
+  ConcurrentMark* _cm;
+
+  int       _shifter;
+  size_t    _bitmap_word_covers_words;
+
+  // beginning of the heap
+  HeapWord* _heap_start;
+
+  // this is the actual start of the GCLab
+  HeapWord* _real_start_word;
+
+  // this is the actual end of the GCLab
+  HeapWord* _real_end_word;
+
+  // this is the first word, possibly located before the actual start
+  // of the GCLab, that corresponds to the first bit of the bitmap
+  HeapWord* _start_word;
+
+  // size of a GCLab in words
+  size_t _gclab_word_size;
+
+  static int shifter() {
+    return MinObjAlignment - 1;
+  }
+
+  // how many heap words does a single bitmap word corresponds to?
+  static size_t bitmap_word_covers_words() {
+    return BitsPerWord << shifter();
+  }
+
+  static size_t gclab_word_size() {
+    return G1ParallelGCAllocBufferSize / HeapWordSize;
+  }
+
+  static size_t bitmap_size_in_bits() {
+    size_t bits_in_bitmap = gclab_word_size() >> shifter();
+    // We are going to ensure that the beginning of a word in this
+    // bitmap also corresponds to the beginning of a word in the
+    // global marking bitmap. To handle the case where a GCLab
+    // starts from the middle of the bitmap, we need to add enough
+    // space (i.e. up to a bitmap word) to ensure that we have
+    // enough bits in the bitmap.
+    return bits_in_bitmap + BitsPerWord - 1;
+  }
+public:
+  GCLabBitMap(HeapWord* heap_start)
+    : BitMap(bitmap_size_in_bits()),
+      _cm(G1CollectedHeap::heap()->concurrent_mark()),
+      _shifter(shifter()),
+      _bitmap_word_covers_words(bitmap_word_covers_words()),
+      _heap_start(heap_start),
+      _gclab_word_size(gclab_word_size()),
+      _real_start_word(NULL),
+      _real_end_word(NULL),
+      _start_word(NULL)
+  {
+    guarantee( size_in_words() >= bitmap_size_in_words(),
+               "just making sure");
+  }
+
+  inline unsigned heapWordToOffset(HeapWord* addr) {
+    unsigned offset = (unsigned) pointer_delta(addr, _start_word) >> _shifter;
+    assert(offset < size(), "offset should be within bounds");
+    return offset;
+  }
+
+  inline HeapWord* offsetToHeapWord(size_t offset) {
+    HeapWord* addr =  _start_word + (offset << _shifter);
+    assert(_real_start_word <= addr && addr < _real_end_word, "invariant");
+    return addr;
+  }
+
+  bool fields_well_formed() {
+    bool ret1 = (_real_start_word == NULL) &&
+                (_real_end_word == NULL) &&
+                (_start_word == NULL);
+    if (ret1)
+      return true;
+
+    bool ret2 = _real_start_word >= _start_word &&
+      _start_word < _real_end_word &&
+      (_real_start_word + _gclab_word_size) == _real_end_word &&
+      (_start_word + _gclab_word_size + _bitmap_word_covers_words)
+                                                              > _real_end_word;
+    return ret2;
+  }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (addr >= _real_start_word && addr < _real_end_word) {
+      assert(!isMarked(addr), "should not have already been marked");
+
+      // first mark it on the bitmap
+      at_put(heapWordToOffset(addr), true);
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  inline bool isMarked(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    return at(heapWordToOffset(addr));
+  }
+
+  void set_buffer(HeapWord* start) {
+    guarantee(use_local_bitmaps, "invariant");
+    clear();
+
+    assert(start != NULL, "invariant");
+    _real_start_word = start;
+    _real_end_word   = start + _gclab_word_size;
+
+    size_t diff =
+      pointer_delta(start, _heap_start) % _bitmap_word_covers_words;
+    _start_word = start - diff;
+
+    assert(fields_well_formed(), "invariant");
+  }
+
+#ifndef PRODUCT
+  void verify() {
+    // verify that the marks have been propagated
+    GCLabBitMapClosure cl(_cm, this);
+    iterate(&cl);
+  }
+#endif // PRODUCT
+
+  void retire() {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(fields_well_formed(), "invariant");
+
+    if (_start_word != NULL) {
+      CMBitMap*       mark_bitmap = _cm->nextMarkBitMap();
+
+      // this means that the bitmap was set up for the GCLab
+      assert(_real_start_word != NULL && _real_end_word != NULL, "invariant");
+
+      mark_bitmap->mostly_disjoint_range_union(this,
+                                0, // always start from the start of the bitmap
+                                _start_word,
+                                size_in_words());
+      _cm->grayRegionIfNecessary(MemRegion(_real_start_word, _real_end_word));
+
+#ifndef PRODUCT
+      if (use_local_bitmaps && verify_local_bitmaps)
+        verify();
+#endif // PRODUCT
+    } else {
+      assert(_real_start_word == NULL && _real_end_word == NULL, "invariant");
+    }
+  }
+
+  static size_t bitmap_size_in_words() {
+    return (bitmap_size_in_bits() + BitsPerWord - 1) / BitsPerWord;
+  }
+};
+
+class G1ParGCAllocBuffer: public ParGCAllocBuffer {
+private:
+  bool        _retired;
+  bool        _during_marking;
+  GCLabBitMap _bitmap;
+
+public:
+  G1ParGCAllocBuffer() :
+    ParGCAllocBuffer(G1ParallelGCAllocBufferSize / HeapWordSize),
+    _during_marking(G1CollectedHeap::heap()->mark_in_progress()),
+    _bitmap(G1CollectedHeap::heap()->reserved_region().start()),
+    _retired(false)
+  { }
+
+  inline bool mark(HeapWord* addr) {
+    guarantee(use_local_bitmaps, "invariant");
+    assert(_during_marking, "invariant");
+    return _bitmap.mark(addr);
+  }
+
+  inline void set_buf(HeapWord* buf) {
+    if (use_local_bitmaps && _during_marking)
+      _bitmap.set_buffer(buf);
+    ParGCAllocBuffer::set_buf(buf);
+    _retired = false;
+  }
+
+  inline void retire(bool end_of_gc, bool retain) {
+    if (_retired)
+      return;
+    if (use_local_bitmaps && _during_marking) {
+      _bitmap.retire();
+    }
+    ParGCAllocBuffer::retire(end_of_gc, retain);
+    _retired = true;
+  }
+};
+
+class G1ParScanThreadState : public StackObj {
+protected:
+  G1CollectedHeap* _g1h;
+  RefToScanQueue*  _refs;
+  DirtyCardQueue   _dcq;
+  CardTableModRefBS* _ct_bs;
+  G1RemSet* _g1_rem;
+
+  typedef GrowableArray<StarTask> OverflowQueue;
+  OverflowQueue* _overflowed_refs;
+
+  G1ParGCAllocBuffer _alloc_buffers[GCAllocPurposeCount];
+  ageTable           _age_table;
+
+  size_t           _alloc_buffer_waste;
+  size_t           _undo_waste;
+
+  OopsInHeapRegionClosure*      _evac_failure_cl;
+  G1ParScanHeapEvacClosure*     _evac_cl;
+  G1ParScanPartialArrayClosure* _partial_scan_cl;
+
+  int _hash_seed;
+  int _queue_num;
+
+  int _term_attempts;
+#if G1_DETAILED_STATS
+  int _pushes, _pops, _steals, _steal_attempts;
+  int _overflow_pushes;
+#endif
+
+  double _start;
+  double _start_strong_roots;
+  double _strong_roots_time;
+  double _start_term;
+  double _term_time;
+
+  // Map from young-age-index (0 == not young, 1 is youngest) to
+  // surviving words. base is what we get back from the malloc call
+  size_t* _surviving_young_words_base;
+  // this points into the array, as we use the first few entries for padding
+  size_t* _surviving_young_words;
+
+#define PADDING_ELEM_NUM (64 / sizeof(size_t))
+
+  void   add_to_alloc_buffer_waste(size_t waste) { _alloc_buffer_waste += waste; }
+
+  void   add_to_undo_waste(size_t waste)         { _undo_waste += waste; }
+
+  DirtyCardQueue& dirty_card_queue()             { return _dcq;  }
+  CardTableModRefBS* ctbs()                      { return _ct_bs; }
+
+  template <class T> void immediate_rs_update(HeapRegion* from, T* p, int tid) {
+    if (!from->is_survivor()) {
+      _g1_rem->par_write_ref(from, p, tid);
+    }
+  }
+
+  template <class T> void deferred_rs_update(HeapRegion* from, T* p, int tid) {
+    // If the new value of the field points to the same region or
+    // is the to-space, we don't need to include it in the Rset updates.
+    if (!from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) && !from->is_survivor()) {
+      size_t card_index = ctbs()->index_for(p);
+      // If the card hasn't been added to the buffer, do it.
+      if (ctbs()->mark_card_deferred(card_index)) {
+        dirty_card_queue().enqueue((jbyte*)ctbs()->byte_for_index(card_index));
+      }
+    }
+  }
+
+public:
+  G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num);
+
+  ~G1ParScanThreadState() {
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
+  }
+
+  RefToScanQueue*   refs()            { return _refs;             }
+  OverflowQueue*    overflowed_refs() { return _overflowed_refs;  }
+  ageTable*         age_table()       { return &_age_table;       }
+
+  G1ParGCAllocBuffer* alloc_buffer(GCAllocPurpose purpose) {
+    return &_alloc_buffers[purpose];
+  }
+
+  size_t alloc_buffer_waste()                    { return _alloc_buffer_waste; }
+  size_t undo_waste()                            { return _undo_waste; }
+
+  template <class T> void push_on_queue(T* ref) {
+    assert(ref != NULL, "invariant");
+    assert(has_partial_array_mask(ref) ||
+           _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(ref)), "invariant");
+#ifdef ASSERT
+    if (has_partial_array_mask(ref)) {
+      oop p = clear_partial_array_mask(ref);
+      // Verify that we point into the CS
+      assert(_g1h->obj_in_cs(p), "Should be in CS");
+    }
+#endif
+    if (!refs()->push(ref)) {
+      overflowed_refs()->push(ref);
+      IF_G1_DETAILED_STATS(note_overflow_push());
+    } else {
+      IF_G1_DETAILED_STATS(note_push());
+    }
+  }
+
+  void pop_from_queue(StarTask& ref) {
+    if (refs()->pop_local(ref)) {
+      assert((oop*)ref != NULL, "pop_local() returned true");
+      assert(UseCompressedOops || !ref.is_narrow(), "Error");
+      assert(has_partial_array_mask((oop*)ref) ||
+             _g1h->obj_in_cs(ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)ref)
+                                             : oopDesc::load_decode_heap_oop((oop*)ref)),
+             "invariant");
+      IF_G1_DETAILED_STATS(note_pop());
+    } else {
+      StarTask null_task;
+      ref = null_task;
+    }
+  }
+
+  void pop_from_overflow_queue(StarTask& ref) {
+    StarTask new_ref = overflowed_refs()->pop();
+    assert((oop*)new_ref != NULL, "pop() from a local non-empty stack");
+    assert(UseCompressedOops || !new_ref.is_narrow(), "Error");
+    assert(has_partial_array_mask((oop*)new_ref) ||
+           _g1h->obj_in_cs(new_ref.is_narrow() ? oopDesc::load_decode_heap_oop((narrowOop*)new_ref)
+                                               : oopDesc::load_decode_heap_oop((oop*)new_ref)),
+             "invariant");
+    ref = new_ref;
+  }
+
+  int refs_to_scan()                             { return refs()->size();                 }
+  int overflowed_refs_to_scan()                  { return overflowed_refs()->length();    }
+
+  template <class T> void update_rs(HeapRegion* from, T* p, int tid) {
+    if (G1DeferredRSUpdate) {
+      deferred_rs_update(from, p, tid);
+    } else {
+      immediate_rs_update(from, p, tid);
+    }
+  }
+
+  HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
+
+    HeapWord* obj = NULL;
+    if (word_sz * 100 <
+        (size_t)(G1ParallelGCAllocBufferSize / HeapWordSize) *
+                                                  ParallelGCBufferWastePct) {
+      G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
+      add_to_alloc_buffer_waste(alloc_buf->words_remaining());
+      alloc_buf->retire(false, false);
+
+      HeapWord* buf =
+        _g1h->par_allocate_during_gc(purpose, G1ParallelGCAllocBufferSize / HeapWordSize);
+      if (buf == NULL) return NULL; // Let caller handle allocation failure.
+      // Otherwise.
+      alloc_buf->set_buf(buf);
+
+      obj = alloc_buf->allocate(word_sz);
+      assert(obj != NULL, "buffer was definitely big enough...");
+    } else {
+      obj = _g1h->par_allocate_during_gc(purpose, word_sz);
+    }
+    return obj;
+  }
+
+  HeapWord* allocate(GCAllocPurpose purpose, size_t word_sz) {
+    HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+    if (obj != NULL) return obj;
+    return allocate_slow(purpose, word_sz);
+  }
+
+  void undo_allocation(GCAllocPurpose purpose, HeapWord* obj, size_t word_sz) {
+    if (alloc_buffer(purpose)->contains(obj)) {
+      assert(alloc_buffer(purpose)->contains(obj + word_sz - 1),
+             "should contain whole object");
+      alloc_buffer(purpose)->undo_allocation(obj, word_sz);
+    } else {
+      CollectedHeap::fill_with_object(obj, word_sz);
+      add_to_undo_waste(word_sz);
+    }
+  }
+
+  void set_evac_failure_closure(OopsInHeapRegionClosure* evac_failure_cl) {
+    _evac_failure_cl = evac_failure_cl;
+  }
+  OopsInHeapRegionClosure* evac_failure_closure() {
+    return _evac_failure_cl;
+  }
+
+  void set_evac_closure(G1ParScanHeapEvacClosure* evac_cl) {
+    _evac_cl = evac_cl;
+  }
+
+  void set_partial_scan_closure(G1ParScanPartialArrayClosure* partial_scan_cl) {
+    _partial_scan_cl = partial_scan_cl;
+  }
+
+  int* hash_seed() { return &_hash_seed; }
+  int  queue_num() { return _queue_num; }
+
+  int term_attempts()   { return _term_attempts; }
+  void note_term_attempt()  { _term_attempts++; }
+
+#if G1_DETAILED_STATS
+  int pushes()          { return _pushes; }
+  int pops()            { return _pops; }
+  int steals()          { return _steals; }
+  int steal_attempts()  { return _steal_attempts; }
+  int overflow_pushes() { return _overflow_pushes; }
+
+  void note_push()          { _pushes++; }
+  void note_pop()           { _pops++; }
+  void note_steal()         { _steals++; }
+  void note_steal_attempt() { _steal_attempts++; }
+  void note_overflow_push() { _overflow_pushes++; }
+#endif
+
+  void start_strong_roots() {
+    _start_strong_roots = os::elapsedTime();
+  }
+  void end_strong_roots() {
+    _strong_roots_time += (os::elapsedTime() - _start_strong_roots);
+  }
+  double strong_roots_time() { return _strong_roots_time; }
+
+  void start_term_time() {
+    note_term_attempt();
+    _start_term = os::elapsedTime();
+  }
+  void end_term_time() {
+    _term_time += (os::elapsedTime() - _start_term);
+  }
+  double term_time() { return _term_time; }
+
+  double elapsed() {
+    return os::elapsedTime() - _start;
+  }
+
+  size_t* surviving_young_words() {
+    // We add on to hide entry 0 which accumulates surviving words for
+    // age -1 regions (i.e. non-young ones)
+    return _surviving_young_words;
+  }
+
+  void retire_alloc_buffers() {
+    for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
+      size_t waste = _alloc_buffers[ap].words_remaining();
+      add_to_alloc_buffer_waste(waste);
+      _alloc_buffers[ap].retire(true, false);
+    }
+  }
+
+private:
+  template <class T> void deal_with_reference(T* ref_to_scan) {
+    if (has_partial_array_mask(ref_to_scan)) {
+      _partial_scan_cl->do_oop_nv(ref_to_scan);
+    } else {
+      // Note: we can use "raw" versions of "region_containing" because
+      // "obj_to_scan" is definitely in the heap, and is not in a
+      // humongous region.
+      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+      _evac_cl->set_region(r);
+      _evac_cl->do_oop_nv(ref_to_scan);
+    }
+  }
+
+public:
+  void trim_queue() {
+    // I've replicated the loop twice, first to drain the overflow
+    // queue, second to drain the task queue. This is better than
+    // having a single loop, which checks both conditions and, inside
+    // it, either pops the overflow queue or the task queue, as each
+    // loop is tighter. Also, the decision to drain the overflow queue
+    // first is not arbitrary, as the overflow queue is not visible
+    // to the other workers, whereas the task queue is. So, we want to
+    // drain the "invisible" entries first, while allowing the other
+    // workers to potentially steal the "visible" entries.
+
+    while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
+      while (overflowed_refs_to_scan() > 0) {
+        StarTask ref_to_scan;
+        assert((oop*)ref_to_scan == NULL, "Constructed above");
+        pop_from_overflow_queue(ref_to_scan);
+        // We shouldn't have pushed it on the queue if it was not
+        // pointing into the CSet.
+        assert((oop*)ref_to_scan != NULL, "Follows from inner loop invariant");
+        if (ref_to_scan.is_narrow()) {
+          assert(UseCompressedOops, "Error");
+          narrowOop* p = (narrowOop*)ref_to_scan;
+          assert(!has_partial_array_mask(p) &&
+                 _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+          deal_with_reference(p);
+        } else {
+          oop* p = (oop*)ref_to_scan;
+          assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
+                 _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+          deal_with_reference(p);
+        }
+      }
+
+      while (refs_to_scan() > 0) {
+        StarTask ref_to_scan;
+        assert((oop*)ref_to_scan == NULL, "Constructed above");
+        pop_from_queue(ref_to_scan);
+        if ((oop*)ref_to_scan != NULL) {
+          if (ref_to_scan.is_narrow()) {
+            assert(UseCompressedOops, "Error");
+            narrowOop* p = (narrowOop*)ref_to_scan;
+            assert(!has_partial_array_mask(p) &&
+                   _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+            deal_with_reference(p);
+          } else {
+            oop* p = (oop*)ref_to_scan;
+            assert((has_partial_array_mask(p) && _g1h->obj_in_cs(clear_partial_array_mask(p))) ||
+                  _g1h->obj_in_cs(oopDesc::load_decode_heap_oop(p)), "sanity");
+            deal_with_reference(p);
+          }
+        }
+      }
+    }
+  }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@ -293,10 +293,6 @@ void G1CollectorPolicy::init() {
  if (G1SteadyStateUsed < 50) {
    vm_exit_during_initialization("G1SteadyStateUsed must be at least 50%.");
  }
-  if (UseConcMarkSweepGC) {
-    vm_exit_during_initialization("-XX:+UseG1GC is incompatible with "
-                                  "-XX:+UseConcMarkSweepGC.");
-  }

  initialize_gc_policy_counters();

--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@ -42,18 +42,6 @@ public:
  virtual void set_region(HeapRegion* from) { _from = from; }
 };

-
-class G1ScanAndBalanceClosure : public OopClosure {
-  G1CollectedHeap* _g1;
-  static int _nq;
-public:
-  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
-};
-
 class G1ParClosureSuper : public OopsInHeapRegionClosure {
 protected:
  G1CollectedHeap* _g1;
@ -69,34 +57,32 @@ class G1ParScanClosure : public G1ParClosureSuper {
 public:
  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
    G1ParClosureSuper(g1, par_scan_state) { }
-  void do_oop_nv(oop* p);   // should be made inline
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
  virtual void do_oop(oop* p)          { do_oop_nv(p); }
  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };

-#define G1_PARTIAL_ARRAY_MASK 1
+#define G1_PARTIAL_ARRAY_MASK 0x2

-inline bool has_partial_array_mask(oop* ref) {
-  return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK;
+template <class T> inline bool has_partial_array_mask(T* ref) {
+  return ((uintptr_t)ref & G1_PARTIAL_ARRAY_MASK) == G1_PARTIAL_ARRAY_MASK;
 }

-inline oop* set_partial_array_mask(oop obj) {
-  return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK);
+template <class T> inline T* set_partial_array_mask(T obj) {
+  assert(((uintptr_t)obj & G1_PARTIAL_ARRAY_MASK) == 0, "Information loss!");
+  return (T*) ((uintptr_t)obj | G1_PARTIAL_ARRAY_MASK);
 }

-inline oop clear_partial_array_mask(oop* ref) {
-  return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK);
+template <class T> inline oop clear_partial_array_mask(T* ref) {
+  return oop((intptr_t)ref & ~G1_PARTIAL_ARRAY_MASK);
 }

 class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
  G1ParScanClosure _scanner;
-  template <class T> void process_array_chunk(oop obj, int start, int end);
 public:
  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
-  void do_oop_nv(oop* p);
-  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
  virtual void do_oop(oop* p)       { do_oop_nv(p); }
  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
@ -105,7 +91,7 @@ public:
 class G1ParCopyHelper : public G1ParClosureSuper {
  G1ParScanClosure *_scanner;
 protected:
-  void mark_forwardee(oop* p);
+  template <class T> void mark_forwardee(T* p);
  oop copy_to_survivor_space(oop obj);
 public:
  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
@ -117,36 +103,35 @@ template<bool do_gen_barrier, G1Barrier barrier,
         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure : public G1ParCopyHelper {
  G1ParScanClosure _scanner;
-  void do_oop_work(oop* p);
-  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+  template <class T> void do_oop_work(T* p);
 public:
  G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
    _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
-  inline void do_oop_nv(oop* p) {
+  template <class T> void do_oop_nv(T* p) {
    do_oop_work(p);
    if (do_mark_forwardee)
      mark_forwardee(p);
  }
-  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
  virtual void do_oop(oop* p)       { do_oop_nv(p); }
  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };

 typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
 typedef G1ParCopyClosure<true,  G1BarrierNone, false, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
 typedef G1ParCopyClosure<false, G1BarrierNone, true,  false> G1ParScanAndMarkExtRootClosure;
 typedef G1ParCopyClosure<true,  G1BarrierNone, true,  false> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
 typedef G1ParCopyClosure<false, G1BarrierRS,   true,  false> G1ParScanAndMarkHeapRSClosure;
 // This is the only case when we set skip_cset_test. Basically, this
 // closure is (should?) only be called directly while we're draining
 // the overflow and task queues. In that case we know that the
 // reference in question points into the collection set, otherwise we
-// would not have pushed it on the queue.
-typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+// would not have pushed it on the queue. The following is defined in
+// g1_specialized_oop_closures.hpp.
+// typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
 // We need a separate closure to handle references during evacuation
-// failure processing, as it cannot asume that the reference already
- // points to the collection set (like G1ParScanHeapEvacClosure does).
+// failure processing, as we cannot asume that the reference already
+// points into the collection set (like G1ParScanHeapEvacClosure does).
 typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;

 class FilterIntoCSClosure: public OopClosure {
@ -158,10 +143,9 @@ public:
                        G1CollectedHeap* g1, OopClosure* oc) :
    _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
  {}
-  inline void do_oop_nv(oop* p);
-  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p)        { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
  bool apply_to_weak_ref_discovered_field() { return true; }
  bool do_header() { return false; }
 };
@ -174,10 +158,9 @@ public:
                                     OopsInHeapRegionClosure* oc) :
    _g1(g1), _oc(oc)
  {}
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
  bool apply_to_weak_ref_discovered_field() { return true; }
  bool do_header() { return false; }
  void set_region(HeapRegion* from) {
@ -195,10 +178,9 @@ public:
                                            ConcurrentMark* cm)
  : _g1(g1), _oc(oc), _cm(cm) { }

-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
  bool apply_to_weak_ref_discovered_field() { return true; }
  bool do_header() { return false; }
  void set_region(HeapRegion* from) {
@ -213,10 +195,9 @@ class FilterOutOfRegionClosure: public OopClosure {
  int _out_of_region;
 public:
  FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
-  inline  void do_oop_nv(oop* p);
-  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
-  virtual void do_oop(oop* p);
-  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
  bool apply_to_weak_ref_discovered_field() { return true; }
  bool do_header() { return false; }
  int out_of_region() { return _out_of_region; }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@ -31,9 +31,10 @@
 // perf-critical inner loop.
 #define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0

-inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL && _g1->obj_in_cs(obj)) {
+template <class T> inline void FilterIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) &&
+      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
    _oc->do_oop(p);
 #if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
    _dcto_cl->incr_count();
@ -41,44 +42,32 @@ inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
  }
 }

-inline void FilterIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
 #define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0

-inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  HeapWord* obj_hw = (HeapWord*)obj;
-  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
-    _oc->do_oop(p);
+template <class T> inline void FilterOutOfRegionClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    HeapWord* obj_hw = (HeapWord*)oopDesc::decode_heap_oop_not_null(heap_oop);
+    if (obj_hw < _r_bottom || obj_hw >= _r_end) {
+      _oc->do_oop(p);
 #if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
-    _out_of_region++;
+      _out_of_region++;
 #endif
+    }
  }
 }

-inline void FilterOutOfRegionClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
-inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL && _g1->obj_in_cs(obj))
+template <class T> inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop) &&
+      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop)))
    _oc->do_oop(p);
 }

-inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
-
-
-inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
-  oop obj = *p;
-  if (obj != NULL) {
+template <class T> inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
    HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
    if (hr != NULL) {
      if (hr->in_collection_set())
@ -89,24 +78,29 @@ inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
  }
 }

-inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
-{
-  do_oop_nv(p);
-}
+// This closure is applied to the fields of the objects that have just been copied.
+template <class T> inline void G1ParScanClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);

-inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
-  RefToScanQueue* q;
-  if (ParallelGCThreads > 0) {
-    // Deal the work out equally.
-    _nq = (_nq + 1) % ParallelGCThreads;
-    q = _g1->task_queue(_nq);
-  } else {
-    q = _g1->task_queue(0);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+    if (_g1->in_cset_fast_test(obj)) {
+      // We're not going to even bother checking whether the object is
+      // already forwarded or not, as this usually causes an immediate
+      // stall. We'll try to prefetch the object (for write, given that
+      // we might need to install the forwarding reference) and we'll
+      // get back to it when pop it from the queue
+      Prefetch::write(obj->mark_addr(), 0);
+      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // slightly paranoid test; I'm trying to catch potential
+      // problems before we go into push_on_queue to know where the
+      // problem is coming from
+      assert(obj == oopDesc::load_decode_heap_oop(p),
+             "p should still be pointing to obj");
+      _par_scan_state->push_on_queue(p);
+    } else {
+      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
+    }
  }
-  bool nooverflow = q->push(p);
-  guarantee(nooverflow, "Overflow during poplularity region processing");
-}
-
-inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
-  do_oop_nv(p);
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -65,11 +65,10 @@ public:
  void set_region(HeapRegion* from) {
    _blk->set_region(from);
  }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
    if (_g1->obj_in_cs(obj)) _blk->do_oop(p);
  }
  bool apply_to_weak_ref_discovered_field() { return true; }
@ -110,11 +109,10 @@ class VerifyRSCleanCardOopClosure: public OopClosure {
 public:
  VerifyRSCleanCardOopClosure(G1CollectedHeap* g1) : _g1(g1) {}

-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
    HeapRegion* to = _g1->heap_region_containing(obj);
    guarantee(to == NULL || !to->in_collection_set(),
              "Missed a rem set member.");
@ -129,9 +127,9 @@ HRInto_G1RemSet::HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs)
 {
  _seq_task = new SubTasksDone(NumSeqTasks);
  guarantee(n_workers() > 0, "There should be some workers");
-  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<oop*>*, n_workers());
+  _new_refs = NEW_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, n_workers());
  for (uint i = 0; i < n_workers(); i++) {
-    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<oop*>(8192,true);
+    _new_refs[i] = new (ResourceObj::C_HEAP) GrowableArray<OopOrNarrowOopStar>(8192,true);
  }
 }

@ -140,7 +138,7 @@ HRInto_G1RemSet::~HRInto_G1RemSet() {
  for (uint i = 0; i < n_workers(); i++) {
    delete _new_refs[i];
  }
-  FREE_C_HEAP_ARRAY(GrowableArray<oop*>*, _new_refs);
+  FREE_C_HEAP_ARRAY(GrowableArray<OopOrNarrowOopStar>*, _new_refs);
 }

 void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
@ -428,15 +426,15 @@ public:
  }
 };

-void
-HRInto_G1RemSet::scanNewRefsRS(OopsInHeapRegionClosure* oc,
-                                             int worker_i) {
+template <class T> void
+HRInto_G1RemSet::scanNewRefsRS_work(OopsInHeapRegionClosure* oc,
+                                    int worker_i) {
  double scan_new_refs_start_sec = os::elapsedTime();
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  CardTableModRefBS* ct_bs = (CardTableModRefBS*) (g1h->barrier_set());
  for (int i = 0; i < _new_refs[worker_i]->length(); i++) {
-    oop* p = _new_refs[worker_i]->at(i);
-    oop obj = *p;
+    T* p = (T*) _new_refs[worker_i]->at(i);
+    oop obj = oopDesc::load_decode_heap_oop(p);
    // *p was in the collection set when p was pushed on "_new_refs", but
    // another thread may have processed this location from an RS, so it
    // might not point into the CS any longer.  If so, it's obviously been
@ -549,11 +547,10 @@ class UpdateRSetOopsIntoCSImmediate : public OopClosure {
  G1CollectedHeap* _g1;
 public:
  UpdateRSetOopsIntoCSImmediate(G1CollectedHeap* g1) : _g1(g1) { }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    HeapRegion* to = _g1->heap_region_containing(*p);
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    HeapRegion* to = _g1->heap_region_containing(oopDesc::load_decode_heap_oop(p));
    if (to->in_collection_set()) {
      to->rem_set()->add_reference(p, 0);
    }
@ -567,11 +564,10 @@ class UpdateRSetOopsIntoCSDeferred : public OopClosure {
 public:
  UpdateRSetOopsIntoCSDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) { }
-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
-  virtual void do_oop(oop* p) {
-    oop obj = *p;
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
    if (_g1->obj_in_cs(obj)) {
      size_t card_index = _ct_bs->index_for(p);
      if (_ct_bs->mark_card_deferred(card_index)) {
@ -581,10 +577,10 @@ public:
  }
 };

-void HRInto_G1RemSet::new_refs_iterate(OopClosure* cl) {
+template <class T> void HRInto_G1RemSet::new_refs_iterate_work(OopClosure* cl) {
  for (size_t i = 0; i < n_workers(); i++) {
    for (int j = 0; j < _new_refs[i]->length(); j++) {
-      oop* p = _new_refs[i]->at(j);
+      T* p = (T*) _new_refs[i]->at(j);
      cl->do_oop(p);
    }
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@ -62,10 +62,12 @@ public:
  // If "this" is of the given subtype, return "this", else "NULL".
  virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }

-  // Record, if necessary, the fact that *p (where "p" is in region "from")
-  // has changed to its new value.
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // and is, a fortiori, required to be non-NULL) has changed to its new value.
  virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void write_ref(HeapRegion* from, narrowOop* p) = 0;
  virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+  virtual void par_write_ref(HeapRegion* from, narrowOop* p, int tid) = 0;

  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
  // or card, respectively, such that a region or card with a corresponding
@ -105,7 +107,9 @@ public:

  // Nothing is necessary in the version below.
  void write_ref(HeapRegion* from, oop* p) {}
+  void write_ref(HeapRegion* from, narrowOop* p) {}
  void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+  void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {}

  void scrub(BitMap* region_bm, BitMap* card_bm) {}
  void scrub_par(BitMap* region_bm, BitMap* card_bm,
@ -143,8 +147,19 @@ protected:
  // their references into the collection summarized in "_new_refs".
  bool _par_traversal_in_progress;
  void set_par_traversal(bool b) { _par_traversal_in_progress = b; }
-  GrowableArray<oop*>** _new_refs;
-  void new_refs_iterate(OopClosure* cl);
+  GrowableArray<OopOrNarrowOopStar>** _new_refs;
+  template <class T> void new_refs_iterate_work(OopClosure* cl);
+  void new_refs_iterate(OopClosure* cl) {
+    if (UseCompressedOops) {
+      new_refs_iterate_work<narrowOop>(cl);
+    } else {
+      new_refs_iterate_work<oop>(cl);
+    }
+  }
+
+protected:
+  template <class T> void write_ref_nv(HeapRegion* from, T* p);
+  template <class T> void par_write_ref_nv(HeapRegion* from, T* p, int tid);

 public:
  // This is called to reset dual hash tables after the gc pause
@ -161,7 +176,14 @@ public:
  void prepare_for_oops_into_collection_set_do();
  void cleanup_after_oops_into_collection_set_do();
  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
-  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  template <class T> void scanNewRefsRS_work(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i) {
+    if (UseCompressedOops) {
+      scanNewRefsRS_work<narrowOop>(oc, worker_i);
+    } else {
+      scanNewRefsRS_work<oop>(oc, worker_i);
+    }
+  }
  void updateRS(int worker_i);
  HeapRegion* calculateStartRegion(int i);

@ -172,12 +194,22 @@ public:

  // Record, if necessary, the fact that *p (where "p" is in region "from",
  // which is required to be non-NULL) has changed to a new non-NULL value.
-  inline void write_ref(HeapRegion* from, oop* p);
-  // The "_nv" version is the same; it exists just so that it is not virtual.
-  inline void write_ref_nv(HeapRegion* from, oop* p);
+  // [Below the virtual version calls a non-virtual protected
+  // workhorse that is templatified for narrow vs wide oop.]
+  inline void write_ref(HeapRegion* from, oop* p) {
+    write_ref_nv(from, p);
+  }
+  inline void write_ref(HeapRegion* from, narrowOop* p) {
+    write_ref_nv(from, p);
+  }
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid) {
+    par_write_ref_nv(from, p, tid);
+  }
+  inline void par_write_ref(HeapRegion* from, narrowOop* p, int tid) {
+    par_write_ref_nv(from, p, tid);
+  }

-  inline bool self_forwarded(oop obj);
-  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+  bool self_forwarded(oop obj);

  void scrub(BitMap* region_bm, BitMap* card_bm);
  void scrub_par(BitMap* region_bm, BitMap* card_bm,
@ -208,6 +240,9 @@ class UpdateRSOopClosure: public OopClosure {
  HeapRegion* _from;
  HRInto_G1RemSet* _rs;
  int _worker_i;
+
+  template <class T> void do_oop_work(T* p);
+
 public:
  UpdateRSOopClosure(HRInto_G1RemSet* rs, int worker_i = 0) :
    _from(NULL), _rs(rs), _worker_i(worker_i) {
@ -219,11 +254,10 @@ public:
    _from = from;
  }

-  virtual void do_oop(narrowOop* p);
-  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p)       { do_oop_work(p); }

  // Override: this closure is idempotent.
  //  bool idempotent() { return true; }
  bool apply_to_weak_ref_discovered_field() { return true; }
 };
-
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@ -30,12 +30,8 @@ inline size_t G1RemSet::n_workers() {
  }
 }

-inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
-  par_write_ref(from, p, 0);
-}
-
-inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
-  write_ref_nv(from, p);
+template <class T> inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, T* p) {
+  par_write_ref_nv(from, p, 0);
 }

 inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
@ -43,8 +39,8 @@ inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
  return result;
 }

-inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
-  oop obj = *p;
+template <class T> inline void HRInto_G1RemSet::par_write_ref_nv(HeapRegion* from, T* p, int tid) {
+  oop obj = oopDesc::load_decode_heap_oop(p);
 #ifdef ASSERT
  // can't do because of races
  // assert(obj == NULL || obj->is_oop(), "expected an oop");
@ -71,7 +67,7 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
    // false during the evacuation failure handing.
    if (_par_traversal_in_progress &&
        to->in_collection_set() && !self_forwarded(obj)) {
-      _new_refs[tid]->push(p);
+      _new_refs[tid]->push((void*)p);
      // Deferred updates to the Cset are either discarded (in the normal case),
      // or processed (if an evacuation failure occurs) at the end
      // of the collection.
@ -89,11 +85,7 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
  }
 }

-inline void UpdateRSOopClosure::do_oop(narrowOop* p) {
-  guarantee(false, "NYI");
-}
-
-inline void UpdateRSOopClosure::do_oop(oop* p) {
+template <class T> inline void UpdateRSOopClosure::do_oop_work(T* p) {
  assert(_from != NULL, "from region must be non-NULL");
  _rs->par_write_ref(_from, p, _worker_i);
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
@ -34,6 +34,7 @@ G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,


 void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  assert(pre_val->is_oop_or_null(true), "Error");
  if (!JavaThread::satb_mark_queue_set().active()) return;
  Thread* thr = Thread::current();
  if (thr->is_Java_thread()) {
@ -46,32 +47,31 @@ void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
 }

 // When we know the current java thread:
-void
-G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
-                                                    oop newVal,
+template <class T> void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(T* field,
+                                                    oop new_val,
                                                    JavaThread* jt) {
  if (!JavaThread::satb_mark_queue_set().active()) return;
-  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
-  oop preVal = *(oop*)field;
-  if (preVal != NULL) {
-    jt->satb_mark_queue().enqueue(preVal);
+  T heap_oop = oopDesc::load_heap_oop(field);
+  if (!oopDesc::is_null(heap_oop)) {
+    oop pre_val = oopDesc::decode_heap_oop_not_null(heap_oop);
+    assert(pre_val->is_oop(true /* ignore mark word */), "Error");
+    jt->satb_mark_queue().enqueue(pre_val);
  }
 }

-void
-G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+template <class T> void
+G1SATBCardTableModRefBS::write_ref_array_pre_work(T* dst, int count) {
  if (!JavaThread::satb_mark_queue_set().active()) return;
-  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
-  oop* elem_ptr = (oop*)mr.start();
-  while ((HeapWord*)elem_ptr < mr.end()) {
-    oop elem = *elem_ptr;
-    if (elem != NULL) enqueue(elem);
-    elem_ptr++;
+  T* elem_ptr = dst;
+  for (int i = 0; i < count; i++, elem_ptr++) {
+    T heap_oop = oopDesc::load_heap_oop(elem_ptr);
+    if (!oopDesc::is_null(heap_oop)) {
+      enqueue(oopDesc::decode_heap_oop_not_null(heap_oop));
+    }
  }
 }

-
-
 G1SATBCardTableLoggingModRefBS::
 G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
                               int max_covered_regions) :
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
@ -47,31 +47,41 @@ public:

  // This notes that we don't need to access any BarrierSet data
  // structures, so this can be called from a static context.
-  static void write_ref_field_pre_static(void* field, oop newVal) {
-    assert(!UseCompressedOops, "Else needs to be templatized");
-    oop preVal = *((oop*)field);
-    if (preVal != NULL) {
-      enqueue(preVal);
+  template <class T> static void write_ref_field_pre_static(T* field, oop newVal) {
+    T heap_oop = oopDesc::load_heap_oop(field);
+    if (!oopDesc::is_null(heap_oop)) {
+      enqueue(oopDesc::decode_heap_oop(heap_oop));
    }
  }

  // When we know the current java thread:
-  static void write_ref_field_pre_static(void* field, oop newVal,
-                                         JavaThread* jt);
+  template <class T> static void write_ref_field_pre_static(T* field, oop newVal,
+                                                            JavaThread* jt);

  // We export this to make it available in cases where the static
  // type of the barrier set is known.  Note that it is non-virtual.
-  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
    write_ref_field_pre_static(field, newVal);
  }

-  // This is the more general virtual version.
-  void write_ref_field_pre_work(void* field, oop new_val) {
+  // These are the more general virtual versions.
+  virtual void write_ref_field_pre_work(oop* field, oop new_val) {
    inline_write_ref_field_pre(field, new_val);
  }
+  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+  virtual void write_ref_field_pre_work(void* field, oop new_val) {
+    guarantee(false, "Not needed");
+  }

-  virtual void write_ref_array_pre(MemRegion mr);
-
+  template <class T> void write_ref_array_pre_work(T* dst, int count);
+  virtual void write_ref_array_pre(oop* dst, int count) {
+    write_ref_array_pre_work(dst, count);
+  }
+  virtual void write_ref_array_pre(narrowOop* dst, int count) {
+    write_ref_array_pre_work(dst, count);
+  }
 };

 // Adds card-table logging to the post-barrier.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
@ -80,9 +80,6 @@
  develop(bool, G1TraceConcurrentRefinement, false,                         \
          "Trace G1 concurrent refinement")                                 \
                                                                            \
-  develop(bool, G1ConcMark, true,                                           \
-          "If true, run concurrent marking for G1")                         \
-                                                                            \
  product(intx, G1MarkStackSize, 2 * 1024 * 1024,                           \
          "Size of the mark stack for concurrent marking.")                 \
                                                                            \
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
@ -37,14 +37,12 @@ template<bool do_gen_barrier, G1Barrier barrier,
 class G1ParCopyClosure;
 class G1ParScanClosure;

-typedef G1ParCopyClosure<false, G1BarrierEvac, false, true>
-                                                      G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;

 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
 class FilterInHeapRegionAndIntoCSClosure;
 class FilterAndMarkInHeapRegionAndIntoCSClosure;
-class G1ScanAndBalanceClosure;

 #ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
 #error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
@ -56,8 +54,7 @@ class G1ScanAndBalanceClosure;
      f(FilterIntoCSClosure,_nv)                        \
      f(FilterOutOfRegionClosure,_nv)                   \
      f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
-      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
-      f(G1ScanAndBalanceClosure,_nv)
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)

 #ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
 #error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@ -66,16 +66,16 @@ public:
  bool failures() { return _failures; }
  int n_failures() { return _n_failures; }

-  virtual void do_oop(narrowOop* p) {
-    guarantee(false, "NYI");
-  }
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }

-  void do_oop(oop* p) {
+  template <class T> void do_oop_work(T* p) {
    assert(_containing_obj != NULL, "Precondition");
    assert(!_g1h->is_obj_dead_cond(_containing_obj, _use_prev_marking),
           "Precondition");
-    oop obj = *p;
-    if (obj != NULL) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
      bool failed = false;
      if (!_g1h->is_in_closed_subset(obj) ||
          _g1h->is_obj_dead_cond(obj, _use_prev_marking)) {
@ -106,8 +106,8 @@ public:
      }

      if (!_g1h->full_collection()) {
-        HeapRegion* from = _g1h->heap_region_containing(p);
-        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+        HeapRegion* to   = _g1h->heap_region_containing(obj);
        if (from != NULL && to != NULL &&
            from != to &&
            !to->isHumongous()) {
@ -534,13 +534,13 @@ HeapRegion::object_iterate_mem_careful(MemRegion mr,
  // Otherwise, find the obj that extends onto mr.start().

  assert(cur <= mr.start()
-         && (oop(cur)->klass() == NULL ||
+         && (oop(cur)->klass_or_null() == NULL ||
             cur + oop(cur)->size() > mr.start()),
         "postcondition of block_start");
  oop obj;
  while (cur < mr.end()) {
    obj = oop(cur);
-    if (obj->klass() == NULL) {
+    if (obj->klass_or_null() == NULL) {
      // Ran into an unparseable point.
      return cur;
    } else if (!g1h->is_obj_dead(obj)) {
@ -577,7 +577,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  assert(cur <= mr.start(), "Postcondition");

  while (cur <= mr.start()) {
-    if (oop(cur)->klass() == NULL) {
+    if (oop(cur)->klass_or_null() == NULL) {
      // Ran into an unparseable point.
      return cur;
    }
@ -591,7 +591,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  obj = oop(cur);
  // If we finish this loop...
  assert(cur <= mr.start()
-         && obj->klass() != NULL
+         && obj->klass_or_null() != NULL
         && cur + obj->size() > mr.start(),
         "Loop postcondition");
  if (!g1h->is_obj_dead(obj)) {
@ -601,7 +601,7 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  HeapWord* next;
  while (cur < mr.end()) {
    obj = oop(cur);
-    if (obj->klass() == NULL) {
+    if (obj->klass_or_null() == NULL) {
      // Ran into an unparseable point.
      return cur;
    };
@ -781,8 +781,13 @@ void G1OffsetTableContigSpace::set_saved_mark() {
    // will pick up the right saved_mark_word() as the high water mark
    // of the region. Either way, the behaviour will be correct.
    ContiguousSpace::set_saved_mark();
+    OrderAccess::storestore();
    _gc_time_stamp = curr_gc_time_stamp;
-    OrderAccess::fence();
+    // The following fence is to force a flush of the writes above, but
+    // is strictly not needed because when an allocating worker thread
+    // calls set_saved_mark() it does so under the ParGCRareEvent_lock;
+    // when the lock is released, the write will be flushed.
+    // OrderAccess::fence();
  }
 }

--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
@ -126,7 +126,7 @@ protected:
    }
  }

-  void add_reference_work(oop* from, bool par) {
+  void add_reference_work(OopOrNarrowOopStar from, bool par) {
    // Must make this robust in case "from" is not in "_hr", because of
    // concurrency.

@ -173,11 +173,11 @@ public:
    _bm.clear();
  }

-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
    add_reference_work(from, /*parallel*/ true);
  }

-  void seq_add_reference(oop* from) {
+  void seq_add_reference(OopOrNarrowOopStar from) {
    add_reference_work(from, /*parallel*/ false);
  }

@ -220,7 +220,7 @@ public:
  }

  // Requires "from" to be in "hr()".
-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
    assert(hr()->is_in_reserved(from), "Precondition.");
    size_t card_ind = pointer_delta(from, hr()->bottom(),
                                    CardTableModRefBS::card_size);
@ -394,7 +394,7 @@ public:
  void set_next(PosParPRT* nxt) { _next = nxt; }
  PosParPRT** next_addr() { return &_next; }

-  void add_reference(oop* from, int tid) {
+  void add_reference(OopOrNarrowOopStar from, int tid) {
    // Expand if necessary.
    PerRegionTable** pt = par_tables();
    if (par_tables() == NULL && tid > 0 && hr()->is_gc_alloc_region()) {
@ -447,7 +447,7 @@ public:
    return res;
  }

-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
    if (PerRegionTable::contains_reference(from)) return true;
    if (_par_tables != NULL) {
      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
@ -564,12 +564,15 @@ void OtherRegionsTable::print_from_card_cache() {
 }
 #endif

-void OtherRegionsTable::add_reference(oop* from, int tid) {
+void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
  size_t cur_hrs_ind = hr()->hrs_index();

 #if HRRS_VERBOSE
  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
-                                                  from, *from);
+                                                  from,
+                                                  UseCompressedOops
+                                                  ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                                                  : oopDesc::load_decode_heap_oop((oop*)from));
 #endif

  int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
@ -1021,13 +1024,13 @@ bool OtherRegionsTable::del_single_region_table(size_t ind,
  }
 }

-bool OtherRegionsTable::contains_reference(oop* from) const {
+bool OtherRegionsTable::contains_reference(OopOrNarrowOopStar from) const {
  // Cast away const in this case.
  MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
  return contains_reference_locked(from);
 }

-bool OtherRegionsTable::contains_reference_locked(oop* from) const {
+bool OtherRegionsTable::contains_reference_locked(OopOrNarrowOopStar from) const {
  HeapRegion* hr = _g1h->heap_region_containing_raw(from);
  if (hr == NULL) return false;
  RegionIdx_t hr_ind = (RegionIdx_t) hr->hrs_index();
@ -1288,24 +1291,24 @@ bool HeapRegionRemSetIterator::has_next(size_t& card_index) {



-oop**        HeapRegionRemSet::_recorded_oops = NULL;
-HeapWord**   HeapRegionRemSet::_recorded_cards = NULL;
-HeapRegion** HeapRegionRemSet::_recorded_regions = NULL;
-int          HeapRegionRemSet::_n_recorded = 0;
+OopOrNarrowOopStar* HeapRegionRemSet::_recorded_oops = NULL;
+HeapWord**          HeapRegionRemSet::_recorded_cards = NULL;
+HeapRegion**        HeapRegionRemSet::_recorded_regions = NULL;
+int                 HeapRegionRemSet::_n_recorded = 0;

 HeapRegionRemSet::Event* HeapRegionRemSet::_recorded_events = NULL;
 int*         HeapRegionRemSet::_recorded_event_index = NULL;
 int          HeapRegionRemSet::_n_recorded_events = 0;

-void HeapRegionRemSet::record(HeapRegion* hr, oop* f) {
+void HeapRegionRemSet::record(HeapRegion* hr, OopOrNarrowOopStar f) {
  if (_recorded_oops == NULL) {
    assert(_n_recorded == 0
           && _recorded_cards == NULL
           && _recorded_regions == NULL,
           "Inv");
-    _recorded_oops = NEW_C_HEAP_ARRAY(oop*, MaxRecorded);
-    _recorded_cards = NEW_C_HEAP_ARRAY(HeapWord*, MaxRecorded);
-    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*, MaxRecorded);
+    _recorded_oops    = NEW_C_HEAP_ARRAY(OopOrNarrowOopStar, MaxRecorded);
+    _recorded_cards   = NEW_C_HEAP_ARRAY(HeapWord*,          MaxRecorded);
+    _recorded_regions = NEW_C_HEAP_ARRAY(HeapRegion*,        MaxRecorded);
  }
  if (_n_recorded == MaxRecorded) {
    gclog_or_tty->print_cr("Filled up 'recorded' (%d).", MaxRecorded);
@ -1408,21 +1411,21 @@ void HeapRegionRemSet::test() {
  HeapRegionRemSet* hrrs = hr0->rem_set();

  // Make three references from region 0x101...
-  hrrs->add_reference((oop*)hr1_start);
-  hrrs->add_reference((oop*)hr1_mid);
-  hrrs->add_reference((oop*)hr1_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr1_last);

-  hrrs->add_reference((oop*)hr2_start);
-  hrrs->add_reference((oop*)hr2_mid);
-  hrrs->add_reference((oop*)hr2_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr2_last);

-  hrrs->add_reference((oop*)hr3_start);
-  hrrs->add_reference((oop*)hr3_mid);
-  hrrs->add_reference((oop*)hr3_last);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_start);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_mid);
+  hrrs->add_reference((OopOrNarrowOopStar)hr3_last);

  // Now cause a coarsening.
-  hrrs->add_reference((oop*)hr4->bottom());
-  hrrs->add_reference((oop*)hr5->bottom());
+  hrrs->add_reference((OopOrNarrowOopStar)hr4->bottom());
+  hrrs->add_reference((OopOrNarrowOopStar)hr5->bottom());

  // Now, does iteration yield these three?
  HeapRegionRemSetIterator iter;
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
@ -116,9 +116,9 @@ public:

  // For now.  Could "expand" some tables in the future, so that this made
  // sense.
-  void add_reference(oop* from, int tid);
+  void add_reference(OopOrNarrowOopStar from, int tid);

-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
    return add_reference(from, 0);
  }

@ -140,8 +140,8 @@ public:
  static size_t static_mem_size();
  static size_t fl_mem_size();

-  bool contains_reference(oop* from) const;
-  bool contains_reference_locked(oop* from) const;
+  bool contains_reference(OopOrNarrowOopStar from) const;
+  bool contains_reference_locked(OopOrNarrowOopStar from) const;

  void clear();

@ -192,10 +192,10 @@ private:
  // Unused unless G1RecordHRRSOops is true.

  static const int MaxRecorded = 1000000;
-  static oop**        _recorded_oops;
-  static HeapWord**   _recorded_cards;
-  static HeapRegion** _recorded_regions;
-  static int          _n_recorded;
+  static OopOrNarrowOopStar* _recorded_oops;
+  static HeapWord**          _recorded_cards;
+  static HeapRegion**        _recorded_regions;
+  static int                 _n_recorded;

  static const int MaxRecordedEvents = 1000;
  static Event*       _recorded_events;
@ -231,13 +231,13 @@ public:

  /* Used in the sequential case.  Returns "true" iff this addition causes
     the size limit to be reached. */
-  void add_reference(oop* from) {
+  void add_reference(OopOrNarrowOopStar from) {
    _other_regions.add_reference(from);
  }

  /* Used in the parallel case.  Returns "true" iff this addition causes
     the size limit to be reached. */
-  void add_reference(oop* from, int tid) {
+  void add_reference(OopOrNarrowOopStar from, int tid) {
    _other_regions.add_reference(from, tid);
  }

@ -301,7 +301,7 @@ public:
    return OtherRegionsTable::fl_mem_size();
  }

-  bool contains_reference(oop* from) const {
+  bool contains_reference(OopOrNarrowOopStar from) const {
    return _other_regions.contains_reference(from);
  }
  void print() const;
@ -329,7 +329,7 @@ public:
  }
 #endif

-  static void record(HeapRegion* hr, oop* f);
+  static void record(HeapRegion* hr, OopOrNarrowOopStar f);
  static void print_recorded();
  static void record_event(Event evnt);

--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp
@ -43,6 +43,18 @@ void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
    }
  }
 }
+
+#ifdef ASSERT
+void ObjPtrQueue::verify_oops_in_buffer() {
+  if (_buf == NULL) return;
+  for (size_t i = _index; i < _sz; i += oopSize) {
+    oop obj = (oop)_buf[byte_index_to_index((int)i)];
+    assert(obj != NULL && obj->is_oop(true /* ignore mark word */),
+           "Not an oop");
+  }
+}
+#endif
+
 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
@ -66,6 +78,7 @@ void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,


 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
  t->satb_mark_queue().handle_zero_index();
 }

@ -143,7 +156,7 @@ void SATBMarkQueueSet::abandon_partial_marking() {
    }
    _completed_buffers_tail = NULL;
    _n_completed_buffers = 0;
-    debug_only(assert_completed_buffer_list_len_correct_locked());
+    DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked());
  }
  while (buffers_to_delete != NULL) {
    CompletedBufferNode* nd = buffers_to_delete;
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
@ -39,6 +39,7 @@ public:
  static void apply_closure_to_buffer(ObjectClosure* cl,
                                      void** buf, size_t index, size_t sz);

+  void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };


--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
@ -27,6 +27,7 @@
 bufferingOopClosure.hpp			genOopClosures.hpp
 bufferingOopClosure.hpp			generation.hpp
 bufferingOopClosure.hpp			os.hpp
+bufferingOopClosure.hpp			taskqueue.hpp

 cardTableRS.cpp				concurrentMark.hpp
 cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
@ -139,7 +140,7 @@ g1CollectedHeap.cpp                     concurrentZFThread.hpp
 g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
 g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
 g1CollectedHeap.cpp                     g1MarkSweep.hpp
-g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1RemSet.inline.hpp
 g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
 g1CollectedHeap.cpp                     genOopClosures.inline.hpp
 g1CollectedHeap.cpp                     gcLocker.inline.hpp
@ -151,13 +152,14 @@ g1CollectedHeap.cpp                     icBuffer.hpp
 g1CollectedHeap.cpp                     isGCActiveMark.hpp
 g1CollectedHeap.cpp			oop.inline.hpp
 g1CollectedHeap.cpp			oop.pcgc.inline.hpp
-g1CollectedHeap.cpp			parGCAllocBuffer.hpp
 g1CollectedHeap.cpp                     vm_operations_g1.hpp
 g1CollectedHeap.cpp                     vmThread.hpp

 g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     g1RemSet.hpp
 g1CollectedHeap.hpp                     heapRegion.hpp
 g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp			parGCAllocBuffer.hpp
 g1CollectedHeap.hpp                     sharedHeap.hpp

 g1CollectedHeap.inline.hpp              concurrentMark.hpp
@ -245,6 +247,7 @@ g1RemSet.cpp				intHisto.hpp
 g1RemSet.cpp				iterator.hpp
 g1RemSet.cpp				oop.inline.hpp

+g1RemSet.inline.hpp			oop.inline.hpp
 g1RemSet.inline.hpp			g1RemSet.hpp
 g1RemSet.inline.hpp			heapRegionRemSet.hpp

@ -255,6 +258,7 @@ g1SATBCardTableModRefBS.cpp		thread.hpp
 g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
 g1SATBCardTableModRefBS.cpp		satbQueue.hpp

+g1SATBCardTableModRefBS.hpp		oop.inline.hpp
 g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
 g1SATBCardTableModRefBS.hpp		memRegion.hpp

--- a/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp
@ -31,9 +31,10 @@ void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
                                                        bool clear,
                                                        int n_threads) {
  if (n_threads > 0) {
-    assert(n_threads == (int)ParallelGCThreads, "# worker threads != # requested!");
-
-      // Make sure the LNC array is valid for the space.
+    assert((n_threads == 1 && ParallelGCThreads == 0) ||
+           n_threads <= (int)ParallelGCThreads,
+           "# worker threads != # requested!");
+    // Make sure the LNC array is valid for the space.
    jbyte**   lowest_non_clean;
    uintptr_t lowest_non_clean_base_chunk_index;
    size_t    lowest_non_clean_chunk_size;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@ -885,7 +885,7 @@ void ParallelScavengeHeap::print_tracing_info() const {
 }


-void ParallelScavengeHeap::verify(bool allow_dirty, bool silent) {
+void ParallelScavengeHeap::verify(bool allow_dirty, bool silent, bool option /* ignored */) {
  // Why do we need the total_collections()-filter below?
  if (total_collections() > 0) {
    if (!silent) {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
@ -217,7 +217,7 @@ class ParallelScavengeHeap : public CollectedHeap {
  virtual void gc_threads_do(ThreadClosure* tc) const;
  virtual void print_tracing_info() const;

-  void verify(bool allow_dirty, bool silent);
+  void verify(bool allow_dirty, bool silent, bool /* option */);

  void print_heap_change(size_t prev_used);

--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@ -117,6 +117,7 @@ inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
    process_array_chunk(old);
  } else {
    if (p.is_narrow()) {
+      assert(UseCompressedOops, "Error");
      PSScavenge::copy_and_push_safe_barrier(this, (narrowOop*)p);
    } else {
      PSScavenge::copy_and_push_safe_barrier(this, (oop*)p);
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
@ -533,7 +533,7 @@ class CollectedHeap : public CHeapObj {
  virtual void print_tracing_info() const = 0;

  // Heap verification
-  virtual void verify(bool allow_dirty, bool silent) = 0;
+  virtual void verify(bool allow_dirty, bool silent, bool option) = 0;

  // Non product verification and debugging.
 #ifndef PRODUCT
--- a/hotspot/src/share/vm/includeDB_core
+++ b/hotspot/src/share/vm/includeDB_core
@ -554,7 +554,6 @@ ciEnv.cpp                               jvmtiExport.hpp
 ciEnv.cpp                               linkResolver.hpp
 ciEnv.cpp                               methodDataOop.hpp
 ciEnv.cpp                               objArrayKlass.hpp
-ciEnv.cpp                               oop.hpp
 ciEnv.cpp                               oop.inline.hpp
 ciEnv.cpp                               oop.inline2.hpp
 ciEnv.cpp                               oopFactory.hpp
@ -785,7 +784,6 @@ ciObjectFactory.hpp                     growableArray.hpp
 ciSignature.cpp                         allocation.inline.hpp
 ciSignature.cpp                         ciSignature.hpp
 ciSignature.cpp                         ciUtilities.hpp
-ciSignature.cpp                         oop.hpp
 ciSignature.cpp                         oop.inline.hpp
 ciSignature.cpp                         signature.hpp

@ -950,7 +948,6 @@ classLoadingService.hpp                 perfData.hpp
 classify.cpp                            classify.hpp
 classify.cpp                            systemDictionary.hpp

-classify.hpp                            oop.hpp
 classify.hpp                            oop.inline.hpp

 codeBlob.cpp                            allocation.inline.hpp
@ -1185,7 +1182,6 @@ compilerOracle.cpp                      handles.inline.hpp
 compilerOracle.cpp                      jniHandles.hpp
 compilerOracle.cpp                      klass.hpp
 compilerOracle.cpp                      methodOop.hpp
-compilerOracle.cpp                      oop.hpp
 compilerOracle.cpp                      oop.inline.hpp
 compilerOracle.cpp                      oopFactory.hpp
 compilerOracle.cpp                      resourceArea.hpp
@ -1629,7 +1625,6 @@ frame.cpp                               methodDataOop.hpp
 frame.cpp                               methodOop.hpp
 frame.cpp                               monitorChunk.hpp
 frame.cpp                               nativeInst_<arch>.hpp
-frame.cpp                               oop.hpp
 frame.cpp                               oop.inline.hpp
 frame.cpp                               oop.inline2.hpp
 frame.cpp                               oopMapCache.hpp
@ -1797,7 +1792,6 @@ generation.cpp                          genOopClosures.inline.hpp
 generation.cpp                          generation.hpp
 generation.cpp                          generation.inline.hpp
 generation.cpp                          java.hpp
-generation.cpp                          oop.hpp
 generation.cpp                          oop.inline.hpp
 generation.cpp                          spaceDecorator.hpp
 generation.cpp                          space.inline.hpp
@ -2270,7 +2264,6 @@ java.cpp                                jvmtiExport.hpp
 java.cpp                                memprofiler.hpp
 java.cpp                                methodOop.hpp
 java.cpp                                objArrayOop.hpp
-java.cpp                                oop.hpp
 java.cpp                                oop.inline.hpp
 java.cpp                                oopFactory.hpp
 java.cpp                                sharedRuntime.hpp
@ -2947,7 +2940,7 @@ mutex_<os_family>.inline.hpp            thread_<os_family>.inline.hpp
 nativeInst_<arch>.cpp                   assembler_<arch>.inline.hpp
 nativeInst_<arch>.cpp                   handles.hpp
 nativeInst_<arch>.cpp                   nativeInst_<arch>.hpp
-nativeInst_<arch>.cpp                   oop.hpp
+nativeInst_<arch>.cpp                   oop.inline.hpp
 nativeInst_<arch>.cpp                   ostream.hpp
 nativeInst_<arch>.cpp                   resourceArea.hpp
 nativeInst_<arch>.cpp                   sharedRuntime.hpp
@ -3842,7 +3835,7 @@ stackMapTable.hpp                       stackMapFrame.hpp
 stackValue.cpp                          debugInfo.hpp
 stackValue.cpp                          frame.inline.hpp
 stackValue.cpp                          handles.inline.hpp
-stackValue.cpp                          oop.hpp
+stackValue.cpp                          oop.inline.hpp
 stackValue.cpp                          stackValue.hpp

 stackValue.hpp                          handles.hpp
@ -4329,7 +4322,6 @@ typeArrayOop.hpp                        typeArrayKlass.hpp
 unhandledOops.cpp                       collectedHeap.hpp
 unhandledOops.cpp                       gcLocker.inline.hpp
 unhandledOops.cpp                       globalDefinitions.hpp
-unhandledOops.cpp                       oop.hpp
 unhandledOops.cpp                       oop.inline.hpp
 unhandledOops.cpp                       thread.hpp
 unhandledOops.cpp                       unhandledOops.hpp
@ -4465,7 +4457,6 @@ vframe.cpp                              javaClasses.hpp
 vframe.cpp                              nmethod.hpp
 vframe.cpp                              objectMonitor.hpp
 vframe.cpp                              objectMonitor.inline.hpp
-vframe.cpp                              oop.hpp
 vframe.cpp                              oop.inline.hpp
 vframe.cpp                              oopMapCache.hpp
 vframe.cpp                              pcDesc.hpp
@ -4577,7 +4568,6 @@ vmThread.cpp                            events.hpp
 vmThread.cpp                            interfaceSupport.hpp
 vmThread.cpp                            methodOop.hpp
 vmThread.cpp                            mutexLocker.hpp
-vmThread.cpp                            oop.hpp
 vmThread.cpp                            oop.inline.hpp
 vmThread.cpp                            os.hpp
 vmThread.cpp                            resourceArea.hpp
--- a/hotspot/src/share/vm/includeDB_features
+++ b/hotspot/src/share/vm/includeDB_features
@ -47,7 +47,7 @@ dump.cpp                                javaCalls.hpp
 dump.cpp                                javaClasses.hpp
 dump.cpp                                loaderConstraints.hpp
 dump.cpp                                methodDataOop.hpp
-dump.cpp                                oop.hpp
+dump.cpp                                oop.inline.hpp
 dump.cpp                                oopFactory.hpp
 dump.cpp                                resourceArea.hpp
 dump.cpp                                signature.hpp
@ -237,7 +237,7 @@ serialize.cpp                           compactingPermGenGen.hpp
 serialize.cpp                           compiledICHolderOop.hpp
 serialize.cpp                           methodDataOop.hpp
 serialize.cpp                           objArrayOop.hpp
-serialize.cpp                           oop.hpp
+serialize.cpp                           oop.inline.hpp
 serialize.cpp                           symbolTable.hpp
 serialize.cpp                           systemDictionary.hpp

@ -295,7 +295,7 @@ vmStructs.cpp                           nmethod.hpp
 vmStructs.cpp                           objArrayKlass.hpp
 vmStructs.cpp                           objArrayKlassKlass.hpp
 vmStructs.cpp                           objArrayOop.hpp
-vmStructs.cpp                           oop.hpp
+vmStructs.cpp                           oop.inline.hpp
 vmStructs.cpp                           oopMap.hpp
 vmStructs.cpp                           pcDesc.hpp
 vmStructs.cpp                           perfMemory.hpp
--- a/hotspot/src/share/vm/interpreter/rewriter.cpp
+++ b/hotspot/src/share/vm/interpreter/rewriter.cpp
@ -273,6 +273,7 @@ Rewriter::Rewriter(instanceKlassHandle klass, TRAPS)
  compute_index_maps();

  if (RegisterFinalizersAtInit && _klass->name() == vmSymbols::java_lang_Object()) {
+    bool did_rewrite = false;
    int i = _methods->length();
    while (i-- > 0) {
      methodOop method = (methodOop)_methods->obj_at(i);
@ -281,9 +282,11 @@ Rewriter::Rewriter(instanceKlassHandle klass, TRAPS)
        // object for finalization if needed.
        methodHandle m(THREAD, method);
        rewrite_Object_init(m, CHECK);
+        did_rewrite = true;
        break;
      }
    }
+    assert(did_rewrite, "must find Object::<init> to rewrite it");
  }

  // rewrite methods, in two passes
--- a/hotspot/src/share/vm/memory/barrierSet.cpp
+++ b/hotspot/src/share/vm/memory/barrierSet.cpp
@ -25,12 +25,27 @@
 # include "incls/_precompiled.incl"
 # include "incls/_barrierSet.cpp.incl"

-// count is in HeapWord's
+// count is number of array elements being written
 void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
-   Universe::heap()->barrier_set()->write_ref_array_pre(MemRegion(start, start + count));
+  assert(count <= (size_t)max_intx, "count too large");
+#if 0
+  warning("Pre: \t" INTPTR_FORMAT "[" SIZE_FORMAT "]\t",
+                   start,            count);
+#endif
+  if (UseCompressedOops) {
+    Universe::heap()->barrier_set()->write_ref_array_pre((narrowOop*)start, (int)count);
+  } else {
+    Universe::heap()->barrier_set()->write_ref_array_pre(      (oop*)start, (int)count);
+  }
 }

-// count is in HeapWord's
+// count is number of array elements being written
 void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
-   Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, start + count));
+  assert(count <= (size_t)max_intx, "count too large");
+  HeapWord* end = start + objArrayOopDesc::array_size((int)count);
+#if 0
+  warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t",
+                   start,            count,              start,          end);
+#endif
+  Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, end));
 }
--- a/hotspot/src/share/vm/memory/barrierSet.hpp
+++ b/hotspot/src/share/vm/memory/barrierSet.hpp
@ -81,9 +81,13 @@ public:
  // barrier types.  Semantically, it should be thought of as a call to the
  // virtual "_work" function below, which must implement the barrier.)
  // First the pre-write versions...
-  inline void write_ref_field_pre(void* field, oop new_val);
+  template <class T> inline void write_ref_field_pre(T* field, oop new_val);
+private:
+  // Keep this private so as to catch violations at build time.
+  virtual void write_ref_field_pre_work(     void* field, oop new_val) { guarantee(false, "Not needed"); };
 protected:
-  virtual void write_ref_field_pre_work(void* field, oop new_val) {};
+  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
+  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
 public:

  // ...then the post-write version.
@ -117,12 +121,17 @@ public:
  virtual void read_ref_array(MemRegion mr) = 0;
  virtual void read_prim_array(MemRegion mr) = 0;

-  virtual void write_ref_array_pre(MemRegion mr) {}
+  virtual void write_ref_array_pre(      oop* dst, int length) {}
+  virtual void write_ref_array_pre(narrowOop* dst, int length) {}
  inline void write_ref_array(MemRegion mr);

  // Static versions, suitable for calling from generated code.
  static void static_write_ref_array_pre(HeapWord* start, size_t count);
  static void static_write_ref_array_post(HeapWord* start, size_t count);
+  // Narrow oop versions of the above; count is # of array elements being written,
+  // starting with "start", which is HeapWord-aligned.
+  static void static_write_ref_array_pre_narrow(HeapWord* start, size_t count);
+  static void static_write_ref_array_post_narrow(HeapWord* start, size_t count);

 protected:
  virtual void write_ref_array_work(MemRegion mr) = 0;
--- a/hotspot/src/share/vm/memory/barrierSet.inline.hpp
+++ b/hotspot/src/share/vm/memory/barrierSet.inline.hpp
@ -23,10 +23,10 @@
 */

 // Inline functions of BarrierSet, which de-virtualize certain
-// performance-critical calls when when the barrier is the most common
+// performance-critical calls when the barrier is the most common
 // card-table kind.

-void BarrierSet::write_ref_field_pre(void* field, oop new_val) {
+template <class T> void BarrierSet::write_ref_field_pre(T* field, oop new_val) {
  if (kind() == CardTableModRef) {
    ((CardTableModRefBS*)this)->inline_write_ref_field_pre(field, new_val);
  } else {
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
@ -287,7 +287,7 @@ public:
  // these functions here for performance.
 protected:
  void write_ref_field_work(oop obj, size_t offset, oop newVal);
-  void write_ref_field_work(void* field, oop newVal);
+  virtual void write_ref_field_work(void* field, oop newVal);
 public:

  bool has_write_ref_array_opt() { return true; }
@ -317,10 +317,10 @@ public:

  // *** Card-table-barrier-specific things.

-  inline void inline_write_ref_field_pre(void* field, oop newVal) {}
+  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}

-  inline void inline_write_ref_field(void* field, oop newVal) {
-    jbyte* byte = byte_for(field);
+  template <class T> inline void inline_write_ref_field(T* field, oop newVal) {
+    jbyte* byte = byte_for((void*)field);
    *byte = dirty_card;
  }

--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp
@ -1194,7 +1194,7 @@ GCStats* GenCollectedHeap::gc_stats(int level) const {
  return _gens[level]->gc_stats();
 }

-void GenCollectedHeap::verify(bool allow_dirty, bool silent) {
+void GenCollectedHeap::verify(bool allow_dirty, bool silent, bool option /* ignored */) {
  if (!silent) {
    gclog_or_tty->print("permgen ");
  }
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp
@ -325,7 +325,7 @@ public:
  void prepare_for_verify();

  // Override.
-  void verify(bool allow_dirty, bool silent);
+  void verify(bool allow_dirty, bool silent, bool /* option */);

  // Override.
  void print() const;
--- a/hotspot/src/share/vm/memory/genOopClosures.hpp
+++ b/hotspot/src/share/vm/memory/genOopClosures.hpp
@ -57,7 +57,7 @@ class OopsInGenClosure : public OopClosure {
  template <class T> void do_barrier(T* p);

  // Version for use by closures that may be called in parallel code.
-  void par_do_barrier(oop* p);
+  template <class T> void par_do_barrier(T* p);

 public:
  OopsInGenClosure() : OopClosure(NULL),
--- a/hotspot/src/share/vm/memory/genOopClosures.inline.hpp
+++ b/hotspot/src/share/vm/memory/genOopClosures.inline.hpp
@ -40,18 +40,20 @@ inline void OopsInGenClosure::set_generation(Generation* gen) {

 template <class T> inline void OopsInGenClosure::do_barrier(T* p) {
  assert(generation()->is_in_reserved(p), "expected ref in generation");
-  assert(!oopDesc::is_null(*p), "expected non-null object");
-  oop obj = oopDesc::load_decode_heap_oop_not_null(p);
+  T heap_oop = oopDesc::load_heap_oop(p);
+  assert(!oopDesc::is_null(heap_oop), "expected non-null oop");
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  // If p points to a younger generation, mark the card.
  if ((HeapWord*)obj < _gen_boundary) {
    _rs->inline_write_ref_field_gc(p, obj);
  }
 }

-inline void OopsInGenClosure::par_do_barrier(oop* p) {
+template <class T> inline void OopsInGenClosure::par_do_barrier(T* p) {
  assert(generation()->is_in_reserved(p), "expected ref in generation");
-  oop obj = *p;
-  assert(obj != NULL, "expected non-null object");
+  T heap_oop = oopDesc::load_heap_oop(p);
+  assert(!oopDesc::is_null(heap_oop), "expected non-null oop");
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
  // If p points to a younger generation, mark the card.
  if ((HeapWord*)obj < gen_boundary()) {
    rs()->write_ref_field_gc_par(p, obj);
--- a/hotspot/src/share/vm/memory/referenceProcessor.cpp
+++ b/hotspot/src/share/vm/memory/referenceProcessor.cpp
@ -1013,12 +1013,19 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list,
  // discovered_addr.
  oop current_head = refs_list.head();

-  // Note: In the case of G1, this pre-barrier is strictly
+  // Note: In the case of G1, this specific pre-barrier is strictly
  // not necessary because the only case we are interested in
-  // here is when *discovered_addr is NULL, so this will expand to
-  // nothing. As a result, I am just manually eliding this out for G1.
+  // here is when *discovered_addr is NULL (see the CAS further below),
+  // so this will expand to nothing. As a result, we have manually
+  // elided this out for G1, but left in the test for some future
+  // collector that might have need for a pre-barrier here.
  if (_discovered_list_needs_barrier && !UseG1GC) {
-    _bs->write_ref_field_pre((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+    if (UseCompressedOops) {
+      _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
+    } else {
+      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+    }
+    guarantee(false, "Need to check non-G1 collector");
  }
  oop retest = oopDesc::atomic_compare_exchange_oop(current_head, discovered_addr,
                                                    NULL);
@ -1029,9 +1036,8 @@ ReferenceProcessor::add_to_discovered_list_mt(DiscoveredList& refs_list,
    refs_list.set_head(obj);
    refs_list.inc_length(1);
    if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((void*)discovered_addr, current_head); guarantee(false, "Needs to be fixed: YSR");
+      _bs->write_ref_field((void*)discovered_addr, current_head);
    }
-
  } else {
    // If retest was non NULL, another thread beat us to it:
    // The reference has already been discovered...
@ -1177,11 +1183,16 @@ bool ReferenceProcessor::discover_reference(oop obj, ReferenceType rt) {
    // pre-value, we can safely elide the pre-barrier here for the case of G1.
    assert(discovered == NULL, "control point invariant");
    if (_discovered_list_needs_barrier && !UseG1GC) { // safe to elide for G1
-      _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+      if (UseCompressedOops) {
+        _bs->write_ref_field_pre((narrowOop*)discovered_addr, current_head);
+      } else {
+        _bs->write_ref_field_pre((oop*)discovered_addr, current_head);
+      }
+      guarantee(false, "Need to check non-G1 collector");
    }
    oop_store_raw(discovered_addr, current_head);
    if (_discovered_list_needs_barrier) {
-      _bs->write_ref_field((oop*)discovered_addr, current_head);
+      _bs->write_ref_field((void*)discovered_addr, current_head);
    }
    list->set_head(obj);
    list->inc_length(1);
--- a/hotspot/src/share/vm/memory/space.hpp
+++ b/hotspot/src/share/vm/memory/space.hpp
@ -106,6 +106,7 @@ class Space: public CHeapObj {
  virtual void set_end(HeapWord* value)    { _end = value; }

  virtual HeapWord* saved_mark_word() const  { return _saved_mark_word; }
+
  void set_saved_mark_word(HeapWord* p) { _saved_mark_word = p; }

  MemRegionClosure* preconsumptionDirtyCardClosure() const {
--- a/hotspot/src/share/vm/memory/universe.cpp
+++ b/hotspot/src/share/vm/memory/universe.cpp
@ -1170,7 +1170,7 @@ void Universe::print_heap_after_gc(outputStream* st) {
  st->print_cr("}");
 }

-void Universe::verify(bool allow_dirty, bool silent) {
+void Universe::verify(bool allow_dirty, bool silent, bool option) {
  if (SharedSkipVerify) {
    return;
  }
@ -1194,7 +1194,7 @@ void Universe::verify(bool allow_dirty, bool silent) {
  if (!silent) gclog_or_tty->print("[Verifying ");
  if (!silent) gclog_or_tty->print("threads ");
  Threads::verify();
-  heap()->verify(allow_dirty, silent);
+  heap()->verify(allow_dirty, silent, option);

  if (!silent) gclog_or_tty->print("syms ");
  SymbolTable::verify();
--- a/hotspot/src/share/vm/memory/universe.hpp
+++ b/hotspot/src/share/vm/memory/universe.hpp
@ -343,6 +343,7 @@ class Universe: AllStatic {
  // For UseCompressedOops
  static address* narrow_oop_base_addr()              { return &_narrow_oop._base; }
  static address  narrow_oop_base()                   { return  _narrow_oop._base; }
+  static bool  is_narrow_oop_base(void* addr)         { return (narrow_oop_base() == (address)addr); }
  static int      narrow_oop_shift()                  { return  _narrow_oop._shift; }
  static void     set_narrow_oop_base(address base)   { _narrow_oop._base  = base; }
  static void     set_narrow_oop_shift(int shift)     { _narrow_oop._shift = shift; }
@ -398,7 +399,7 @@ class Universe: AllStatic {

  // Debugging
  static bool verify_in_progress() { return _verify_in_progress; }
-  static void verify(bool allow_dirty = true, bool silent = false);
+  static void verify(bool allow_dirty = true, bool silent = false, bool option = true);
  static int  verify_count()                  { return _verify_count; }
  static void print();
  static void print_on(outputStream* st);
--- a/hotspot/src/share/vm/oops/instanceRefKlass.cpp
+++ b/hotspot/src/share/vm/oops/instanceRefKlass.cpp
@ -28,13 +28,14 @@
 template <class T>
 static void specialized_oop_follow_contents(instanceRefKlass* ref, oop obj) {
  T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);
  debug_only(
    if(TraceReferenceGC && PrintGCDetails) {
      gclog_or_tty->print_cr("instanceRefKlass::oop_follow_contents " INTPTR_FORMAT, obj);
    }
  )
-  if (referent != NULL) {
+  if (!oopDesc::is_null(heap_oop)) {
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
    if (!referent->is_gc_marked() &&
        MarkSweep::ref_processor()->
          discover_reference(obj, ref->reference_type())) {
@ -81,13 +82,14 @@ static void specialized_oop_follow_contents(instanceRefKlass* ref,
                                            ParCompactionManager* cm,
                                            oop obj) {
  T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);
  debug_only(
    if(TraceReferenceGC && PrintGCDetails) {
      gclog_or_tty->print_cr("instanceRefKlass::oop_follow_contents " INTPTR_FORMAT, obj);
    }
  )
-  if (referent != NULL) {
+  if (!oopDesc::is_null(heap_oop)) {
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
    if (PSParallelCompact::mark_bitmap()->is_unmarked(referent) &&
        PSParallelCompact::ref_processor()->
          discover_reference(obj, ref->reference_type())) {
@ -182,9 +184,10 @@ int instanceRefKlass::oop_adjust_pointers(oop obj) {
  }                                                                             \
                                                                                \
  T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);           \
-  oop referent = oopDesc::load_decode_heap_oop(referent_addr);                  \
-  if (referent != NULL && contains(referent_addr)) {                            \
+  T heap_oop = oopDesc::load_heap_oop(referent_addr);                           \
+  if (!oopDesc::is_null(heap_oop) && contains(referent_addr)) {                 \
    ReferenceProcessor* rp = closure->_ref_processor;                           \
+    oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);                 \
    if (!referent->is_gc_marked() && (rp != NULL) &&                            \
        rp->discover_reference(obj, reference_type())) {                        \
      return size;                                                              \
--- a/hotspot/src/share/vm/oops/methodKlass.cpp
+++ b/hotspot/src/share/vm/oops/methodKlass.cpp
@ -68,7 +68,7 @@ methodOop methodKlass::allocate(constMethodHandle xconst,
  m->set_constants(NULL);
  m->set_max_stack(0);
  m->set_max_locals(0);
-  m->clear_intrinsic_id_cache();
+  m->set_intrinsic_id(vmIntrinsics::_none);
  m->set_method_data(NULL);
  m->set_interpreter_throwout_count(0);
  m->set_vtable_index(methodOopDesc::garbage_vtable_index);
--- a/hotspot/src/share/vm/oops/methodOop.cpp
+++ b/hotspot/src/share/vm/oops/methodOop.cpp
@ -962,26 +962,39 @@ methodHandle methodOopDesc:: clone_with_new_data(methodHandle m, u_char* new_cod
  return newm;
 }

-vmIntrinsics::ID methodOopDesc::compute_intrinsic_id() const {
-  assert(vmIntrinsics::_none == 0, "correct coding of default case");
-  const uintptr_t max_cache_uint = right_n_bits((int)(sizeof(_intrinsic_id_cache) * BitsPerByte));
-  assert((uintptr_t)vmIntrinsics::ID_LIMIT <= max_cache_uint, "else fix cache size");
+vmSymbols::SID methodOopDesc::klass_id_for_intrinsics(klassOop holder) {
  // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
  // because we are not loading from core libraries
-  if (instanceKlass::cast(method_holder())->class_loader() != NULL) return vmIntrinsics::_none;
+  if (instanceKlass::cast(holder)->class_loader() != NULL)
+    return vmSymbols::NO_SID;   // regardless of name, no intrinsics here

  // see if the klass name is well-known:
-  symbolOop klass_name    = instanceKlass::cast(method_holder())->name();
-  vmSymbols::SID klass_id = vmSymbols::find_sid(klass_name);
-  if (klass_id == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  symbolOop klass_name = instanceKlass::cast(holder)->name();
+  return vmSymbols::find_sid(klass_name);
+}
+
+void methodOopDesc::init_intrinsic_id() {
+  assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
+  const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
+  assert((uintptr_t)vmIntrinsics::ID_LIMIT <= max_id_uint, "else fix size");
+
+  // the klass name is well-known:
+  vmSymbols::SID klass_id = klass_id_for_intrinsics(method_holder());
+  assert(klass_id != vmSymbols::NO_SID, "caller responsibility");

  // ditto for method and signature:
  vmSymbols::SID  name_id = vmSymbols::find_sid(name());
-  if (name_id  == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  if (name_id  == vmSymbols::NO_SID)  return;
  vmSymbols::SID   sig_id = vmSymbols::find_sid(signature());
-  if (sig_id   == vmSymbols::NO_SID)  return vmIntrinsics::_none;
+  if (sig_id   == vmSymbols::NO_SID)  return;
  jshort flags = access_flags().as_short();

+  vmIntrinsics::ID id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
+  if (id != vmIntrinsics::_none) {
+    set_intrinsic_id(id);
+    return;
+  }
+
  // A few slightly irregular cases:
  switch (klass_id) {
  case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_StrictMath):
@ -992,15 +1005,18 @@ vmIntrinsics::ID methodOopDesc::compute_intrinsic_id() const {
    case vmSymbols::VM_SYMBOL_ENUM_NAME(sqrt_name):
      // pretend it is the corresponding method in the non-strict class:
      klass_id = vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_Math);
+      id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
      break;
    }
  }

-  // return intrinsic id if any
-  return vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
+  if (id != vmIntrinsics::_none) {
+    // Set up its iid.  It is an alias method.
+    set_intrinsic_id(id);
+    return;
+  }
 }

-
 // These two methods are static since a GC may move the methodOopDesc
 bool methodOopDesc::load_signature_classes(methodHandle m, TRAPS) {
  bool sig_is_loaded = true;
--- a/hotspot/src/share/vm/oops/methodOop.hpp
+++ b/hotspot/src/share/vm/oops/methodOop.hpp
@ -104,7 +104,7 @@ class methodOopDesc : public oopDesc {
  u2                _max_stack;                  // Maximum number of entries on the expression stack
  u2                _max_locals;                 // Number of local variables used by this method
  u2                _size_of_parameters;         // size of the parameter block (receiver + arguments) in words
-  u1                _intrinsic_id_cache;         // Cache for intrinsic_id; 0 or 1+vmInt::ID
+  u1                _intrinsic_id;               // vmSymbols::intrinsic_id (0 == _none)
  u1                _highest_tier_compile;       // Highest compile level this method has ever seen.
  u2                _interpreter_throwout_count; // Count of times method was exited via exception while interpreting
  u2                _number_of_breakpoints;      // fullspeed debugging support
@ -224,8 +224,6 @@ class methodOopDesc : public oopDesc {
  int highest_tier_compile()                     { return _highest_tier_compile;}
  void set_highest_tier_compile(int level)      { _highest_tier_compile = level;}

-  void clear_intrinsic_id_cache() { _intrinsic_id_cache = 0; }
-
  // Count of times method was exited via exception while interpreting
  void interpreter_throwout_increment() {
    if (_interpreter_throwout_count < 65534) {
@ -571,18 +569,12 @@ class methodOopDesc : public oopDesc {
  void set_cached_itable_index(int index)           { instanceKlass::cast(method_holder())->set_cached_itable_index(method_idnum(), index); }

  // Support for inlining of intrinsic methods
-  vmIntrinsics::ID intrinsic_id() const { // returns zero if not an intrinsic
-    const u1& cache = _intrinsic_id_cache;
-    if (cache != 0) {
-      return (vmIntrinsics::ID)(cache - 1);
-    } else {
-      vmIntrinsics::ID id = compute_intrinsic_id();
-      *(u1*)&cache = ((u1) id) + 1;   // force the cache to be non-const
-      vmIntrinsics::verify_method(id, (methodOop) this);
-      assert((vmIntrinsics::ID)(cache - 1) == id, "proper conversion");
-      return id;
-    }
-  }
+  vmIntrinsics::ID intrinsic_id() const          { return (vmIntrinsics::ID) _intrinsic_id;           }
+  void     set_intrinsic_id(vmIntrinsics::ID id) {                           _intrinsic_id = (u1) id; }
+
+  // Helper routines for intrinsic_id() and vmIntrinsics::method().
+  void init_intrinsic_id();     // updates from _none if a match
+  static vmSymbols::SID klass_id_for_intrinsics(klassOop holder);

  // On-stack replacement support
  bool has_osr_nmethod()                         { return instanceKlass::cast(method_holder())->lookup_osr_nmethod(this, InvocationEntryBci) != NULL; }
@ -635,9 +627,6 @@ class methodOopDesc : public oopDesc {
  void set_size_of_parameters(int size)          { _size_of_parameters = size; }
 private:

-  // Helper routine for intrinsic_id().
-  vmIntrinsics::ID compute_intrinsic_id() const;
-
  // Inlined elements
  address* native_function_addr() const          { assert(is_native(), "must be native"); return (address*) (this+1); }
  address* signature_handler_addr() const        { return native_function_addr() + 1; }
--- a/hotspot/src/share/vm/oops/objArrayKlass.cpp
+++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp
@ -84,8 +84,6 @@ oop objArrayKlass::multi_allocate(int rank, jint* sizes, TRAPS) {
 template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
                               arrayOop d, T* dst, int length, TRAPS) {

-  const size_t word_len = objArrayOopDesc::array_size(length);
-
  BarrierSet* bs = Universe::heap()->barrier_set();
  // For performance reasons, we assume we are that the write barrier we
  // are using has optimized modes for arrays of references.  At least one
@ -93,11 +91,10 @@ template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
  assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
  assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");

-  MemRegion dst_mr = MemRegion((HeapWord*)dst, word_len);
  if (s == d) {
    // since source and destination are equal we do not need conversion checks.
    assert(length > 0, "sanity check");
-    bs->write_ref_array_pre(dst_mr);
+    bs->write_ref_array_pre(dst, length);
    Copy::conjoint_oops_atomic(src, dst, length);
  } else {
    // We have to make sure all elements conform to the destination array
@ -105,7 +102,7 @@ template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
    klassOop stype = objArrayKlass::cast(s->klass())->element_klass();
    if (stype == bound || Klass::cast(stype)->is_subtype_of(bound)) {
      // elements are guaranteed to be subtypes, so no check necessary
-      bs->write_ref_array_pre(dst_mr);
+      bs->write_ref_array_pre(dst, length);
      Copy::conjoint_oops_atomic(src, dst, length);
    } else {
      // slow case: need individual subtype checks
@ -137,6 +134,7 @@ template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
      }
    }
  }
+  const size_t word_len = objArrayOopDesc::array_size(length);
  bs->write_ref_array(MemRegion((HeapWord*)dst, word_len));
 }

--- a/hotspot/src/share/vm/oops/oop.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
@ -148,12 +148,14 @@ inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; }

 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
  assert(!is_null(v), "oop value can never be zero");
+  assert(Universe::heap()->is_in_reserved(v), "Address not in heap");
  address base = Universe::narrow_oop_base();
  int    shift = Universe::narrow_oop_shift();
  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
  assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
  uint64_t result = pd >> shift;
  assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
+  assert(decode_heap_oop(result) == v, "reversibility");
  return (narrowOop)result;
 }

@ -449,7 +451,7 @@ inline void update_barrier_set(void* p, oop v) {
  oopDesc::bs()->write_ref_field(p, v);
 }

-inline void update_barrier_set_pre(void* p, oop v) {
+template <class T> inline void update_barrier_set_pre(T* p, oop v) {
  oopDesc::bs()->write_ref_field_pre(p, v);
 }

@ -459,15 +461,15 @@ template <class T> inline void oop_store(T* p, oop v) {
  } else {
    update_barrier_set_pre(p, v);
    oopDesc::encode_store_heap_oop(p, v);
-    update_barrier_set(p, v);
+    update_barrier_set((void*)p, v);  // cast away type
  }
 }

 template <class T> inline void oop_store(volatile T* p, oop v) {
-  update_barrier_set_pre((void*)p, v);
+  update_barrier_set_pre((T*)p, v);   // cast away volatile
  // Used by release_obj_field_put, so use release_store_ptr.
  oopDesc::release_encode_store_heap_oop(p, v);
-  update_barrier_set((void*)p, v);
+  update_barrier_set((void*)p, v);    // cast away type
 }

 template <class T> inline void oop_store_without_check(T* p, oop v) {
--- a/hotspot/src/share/vm/oops/oopsHierarchy.hpp
+++ b/hotspot/src/share/vm/oops/oopsHierarchy.hpp
@ -29,6 +29,7 @@
 typedef juint narrowOop; // Offset instead of address for an oop within a java object
 typedef class klassOopDesc* wideKlassOop; // to keep SA happy and unhandled oop
                                          // detector happy.
+typedef void* OopOrNarrowOopStar;

 #ifndef CHECK_UNHANDLED_OOPS

--- a/hotspot/src/share/vm/opto/cfgnode.cpp
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp
@ -1796,8 +1796,12 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    for (uint i=1; i<req(); ++i) {// For all paths in
      Node *ii = in(i);
      if (ii->is_DecodeN() && ii->bottom_type() == bottom_type()) {
-        has_decodeN = true;
-        in_decodeN = ii->in(1);
+        // Note: in_decodeN is used only to define the type of new phi.
+        // Find a non dead path otherwise phi type will be wrong.
+        if (ii->in(1)->bottom_type() != Type::TOP) {
+          has_decodeN = true;
+          in_decodeN = ii->in(1);
+        }
      } else if (!ii->is_Phi()) {
        may_push = false;
      }
@ -1805,7 +1809,6 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {

    if (has_decodeN && may_push) {
      PhaseIterGVN *igvn = phase->is_IterGVN();
-      // Note: in_decodeN is used only to define the type of new phi here.
      PhiNode *new_phi = PhiNode::make_blank(in(0), in_decodeN);
      uint orig_cnt = req();
      for (uint i=1; i<req(); ++i) {// For all paths in
--- a/hotspot/src/share/vm/opto/compile.cpp
+++ b/hotspot/src/share/vm/opto/compile.cpp
@ -101,7 +101,8 @@ CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
    }
  }
  // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
-  if (m->intrinsic_id() != vmIntrinsics::_none) {
+  if (m->intrinsic_id() != vmIntrinsics::_none &&
+      m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
    CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
    if (cg != NULL) {
      // Save it for next time:
@ -440,6 +441,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
                  _orig_pc_slot_offset_in_bytes(0),
                  _node_bundling_limit(0),
                  _node_bundling_base(NULL),
+                  _java_calls(0),
+                  _inner_loops(0),
 #ifndef PRODUCT
                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
                  _printer(IdealGraphPrinter::printer()),
@ -710,6 +713,8 @@ Compile::Compile( ciEnv* ci_env,
    _code_buffer("Compile::Fill_buffer"),
    _node_bundling_limit(0),
    _node_bundling_base(NULL),
+    _java_calls(0),
+    _inner_loops(0),
 #ifndef PRODUCT
    _trace_opto_output(TraceOptoOutput),
    _printer(NULL),
@ -1850,22 +1855,26 @@ struct Final_Reshape_Counts : public StackObj {
  int  _float_count;            // count float ops requiring 24-bit precision
  int  _double_count;           // count double ops requiring more precision
  int  _java_call_count;        // count non-inlined 'java' calls
+  int  _inner_loop_count;       // count loops which need alignment
  VectorSet _visited;           // Visitation flags
  Node_List _tests;             // Set of IfNodes & PCTableNodes

  Final_Reshape_Counts() :
-    _call_count(0), _float_count(0), _double_count(0), _java_call_count(0),
+    _call_count(0), _float_count(0), _double_count(0),
+    _java_call_count(0), _inner_loop_count(0),
    _visited( Thread::current()->resource_area() ) { }

  void inc_call_count  () { _call_count  ++; }
  void inc_float_count () { _float_count ++; }
  void inc_double_count() { _double_count++; }
  void inc_java_call_count() { _java_call_count++; }
+  void inc_inner_loop_count() { _inner_loop_count++; }

  int  get_call_count  () const { return _call_count  ; }
  int  get_float_count () const { return _float_count ; }
  int  get_double_count() const { return _double_count; }
  int  get_java_call_count() const { return _java_call_count; }
+  int  get_inner_loop_count() const { return _inner_loop_count; }
 };

 static bool oop_offset_is_sane(const TypeInstPtr* tp) {
@ -1877,7 +1886,7 @@ static bool oop_offset_is_sane(const TypeInstPtr* tp) {

 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
-static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
+static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {

  if ( n->outcnt() == 0 ) return; // dead node
  uint nop = n->Opcode();
@ -1919,13 +1928,13 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
  case Op_CmpF:
  case Op_CmpF3:
  // case Op_ConvL2F: // longs are split into 32-bit halves
-    fpu.inc_float_count();
+    frc.inc_float_count();
    break;

  case Op_ConvF2D:
  case Op_ConvD2F:
-    fpu.inc_float_count();
-    fpu.inc_double_count();
+    frc.inc_float_count();
+    frc.inc_double_count();
    break;

  // Count all double operations that may use FPU
@ -1942,7 +1951,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
  case Op_ConD:
  case Op_CmpD:
  case Op_CmpD3:
-    fpu.inc_double_count();
+    frc.inc_double_count();
    break;
  case Op_Opaque1:              // Remove Opaque Nodes before matching
  case Op_Opaque2:              // Remove Opaque Nodes before matching
@ -1951,7 +1960,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
  case Op_CallStaticJava:
  case Op_CallJava:
  case Op_CallDynamicJava:
-    fpu.inc_java_call_count(); // Count java call site;
+    frc.inc_java_call_count(); // Count java call site;
  case Op_CallRuntime:
  case Op_CallLeaf:
  case Op_CallLeafNoFP: {
@ -1962,7 +1971,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
    // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
    // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
    if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
-      fpu.inc_call_count();   // Count the call site
+      frc.inc_call_count();   // Count the call site
    } else {                  // See if uncommon argument is shared
      Node *n = call->in(TypeFunc::Parms);
      int nop = n->Opcode();
@ -1983,11 +1992,11 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
  case Op_StoreD:
  case Op_LoadD:
  case Op_LoadD_unaligned:
-    fpu.inc_double_count();
+    frc.inc_double_count();
    goto handle_mem;
  case Op_StoreF:
  case Op_LoadF:
-    fpu.inc_float_count();
+    frc.inc_float_count();
    goto handle_mem;

  case Op_StoreB:
@ -2324,6 +2333,12 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
      n->subsume_by(btp);
    }
    break;
+  case Op_Loop:
+  case Op_CountedLoop:
+    if (n->as_Loop()->is_inner_loop()) {
+      frc.inc_inner_loop_count();
+    }
+    break;
  default:
    assert( !n->is_Call(), "" );
    assert( !n->is_Mem(), "" );
@ -2332,17 +2347,17 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {

  // Collect CFG split points
  if (n->is_MultiBranch())
-    fpu._tests.push(n);
+    frc._tests.push(n);
 }

 //------------------------------final_graph_reshaping_walk---------------------
 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
 // requires that the walk visits a node's inputs before visiting the node.
-static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
+static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
  ResourceArea *area = Thread::current()->resource_area();
  Unique_Node_List sfpt(area);

-  fpu._visited.set(root->_idx); // first, mark node as visited
+  frc._visited.set(root->_idx); // first, mark node as visited
  uint cnt = root->req();
  Node *n = root;
  uint  i = 0;
@ -2351,7 +2366,7 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re
      // Place all non-visited non-null inputs onto stack
      Node* m = n->in(i);
      ++i;
-      if (m != NULL && !fpu._visited.test_set(m->_idx)) {
+      if (m != NULL && !frc._visited.test_set(m->_idx)) {
        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
          sfpt.push(m);
        cnt = m->req();
@ -2361,7 +2376,7 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re
      }
    } else {
      // Now do post-visit work
-      final_graph_reshaping_impl( n, fpu );
+      final_graph_reshaping_impl( n, frc );
      if (nstack.is_empty())
        break;             // finished
      n = nstack.node();   // Get node from stack
@ -2442,16 +2457,16 @@ bool Compile::final_graph_reshaping() {
    return true;
  }

-  Final_Reshape_Counts fpu;
+  Final_Reshape_Counts frc;

  // Visit everybody reachable!
  // Allocate stack of size C->unique()/2 to avoid frequent realloc
  Node_Stack nstack(unique() >> 1);
-  final_graph_reshaping_walk(nstack, root(), fpu);
+  final_graph_reshaping_walk(nstack, root(), frc);

  // Check for unreachable (from below) code (i.e., infinite loops).
-  for( uint i = 0; i < fpu._tests.size(); i++ ) {
-    MultiBranchNode *n = fpu._tests[i]->as_MultiBranch();
+  for( uint i = 0; i < frc._tests.size(); i++ ) {
+    MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
    // Get number of CFG targets.
    // Note that PCTables include exception targets after calls.
    uint required_outcnt = n->required_outcnt();
@ -2497,7 +2512,7 @@ bool Compile::final_graph_reshaping() {
    // Check that I actually visited all kids.  Unreached kids
    // must be infinite loops.
    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
-      if (!fpu._visited.test(n->fast_out(j)->_idx)) {
+      if (!frc._visited.test(n->fast_out(j)->_idx)) {
        record_method_not_compilable("infinite loop");
        return true;            // Found unvisited kid; must be unreach
      }
@ -2506,13 +2521,14 @@ bool Compile::final_graph_reshaping() {
  // If original bytecodes contained a mixture of floats and doubles
  // check if the optimizer has made it homogenous, item (3).
  if( Use24BitFPMode && Use24BitFP &&
-      fpu.get_float_count() > 32 &&
-      fpu.get_double_count() == 0 &&
-      (10 * fpu.get_call_count() < fpu.get_float_count()) ) {
+      frc.get_float_count() > 32 &&
+      frc.get_double_count() == 0 &&
+      (10 * frc.get_call_count() < frc.get_float_count()) ) {
    set_24_bit_selection_and_mode( false,  true );
  }

-  set_has_java_calls(fpu.get_java_call_count() > 0);
+  set_java_calls(frc.get_java_call_count());
+  set_inner_loops(frc.get_inner_loop_count());

  // No infinite loops, no reason to bail out.
  return false;
--- a/hotspot/src/share/vm/opto/compile.hpp
+++ b/hotspot/src/share/vm/opto/compile.hpp
@ -223,7 +223,8 @@ class Compile : public Phase {
  PhaseCFG*             _cfg;                   // Results of CFG finding
  bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
  bool                  _in_24_bit_fp_mode;     // We are emitting instructions with 24-bit results
-  bool                  _has_java_calls;        // True if the method has java calls
+  int                   _java_calls;            // Number of java calls in the method
+  int                   _inner_loops;           // Number of inner loops in the method
  Matcher*              _matcher;               // Engine to map ideal to machine instructions
  PhaseRegAlloc*        _regalloc;              // Results of register allocation.
  int                   _frame_slots;           // Size of total frame in stack slots
@ -505,7 +506,9 @@ class Compile : public Phase {
  PhaseCFG*         cfg()                       { return _cfg; }
  bool              select_24_bit_instr() const { return _select_24_bit_instr; }
  bool              in_24_bit_fp_mode() const   { return _in_24_bit_fp_mode; }
-  bool              has_java_calls() const      { return _has_java_calls; }
+  bool              has_java_calls() const      { return _java_calls > 0; }
+  int               java_calls() const          { return _java_calls; }
+  int               inner_loops() const         { return _inner_loops; }
  Matcher*          matcher()                   { return _matcher; }
  PhaseRegAlloc*    regalloc()                  { return _regalloc; }
  int               frame_slots() const         { return _frame_slots; }
@ -532,7 +535,8 @@ class Compile : public Phase {
    _in_24_bit_fp_mode   = mode;
  }

-  void set_has_java_calls(bool z) { _has_java_calls = z; }
+  void  set_java_calls(int z) { _java_calls  = z; }
+  void set_inner_loops(int z) { _inner_loops = z; }

  // Instruction bits passed off to the VM
  int               code_size()                 { return _method_size; }
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@ -578,11 +578,24 @@ PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, Gro
  if (phi_alias_idx == alias_idx) {
    return orig_phi;
  }
-  // have we already created a Phi for this alias index?
+  // Have we recently created a Phi for this alias index?
  PhiNode *result = get_map_phi(orig_phi->_idx);
  if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
    return result;
  }
+  // Previous check may fail when the same wide memory Phi was split into Phis
+  // for different memory slices. Search all Phis for this region.
+  if (result != NULL) {
+    Node* region = orig_phi->in(0);
+    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+      Node* phi = region->fast_out(i);
+      if (phi->is_Phi() &&
+          C->get_alias_index(phi->as_Phi()->adr_type()) == alias_idx) {
+        assert(phi->_idx >= nodes_size(), "only new Phi per instance memory slice");
+        return phi->as_Phi();
+      }
+    }
+  }
  if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) {
    if (C->do_escape_analysis() == true && !C->failing()) {
      // Retry compilation without escape analysis.
@ -595,6 +608,7 @@ PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, Gro
  orig_phi_worklist.append_if_missing(orig_phi);
  const TypePtr *atype = C->get_adr_type(alias_idx);
  result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
+  C->copy_node_notes_to(result, orig_phi);
  set_map_phi(orig_phi->_idx, result);
  igvn->set_type(result, result->bottom_type());
  record_for_optimizer(result);
--- a/hotspot/src/share/vm/opto/graphKit.cpp
+++ b/hotspot/src/share/vm/opto/graphKit.cpp
@ -1373,11 +1373,12 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
  return st;
 }

+
 void GraphKit::pre_barrier(Node* ctl,
                           Node* obj,
                           Node* adr,
-                           uint adr_idx,
-                           Node *val,
+                           uint  adr_idx,
+                           Node* val,
                           const TypeOopPtr* val_type,
                           BasicType bt) {
  BarrierSet* bs = Universe::heap()->barrier_set();
@ -1385,7 +1386,7 @@ void GraphKit::pre_barrier(Node* ctl,
  switch (bs->kind()) {
    case BarrierSet::G1SATBCT:
    case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
      break;

    case BarrierSet::CardTableModRef:
@ -1404,8 +1405,8 @@ void GraphKit::post_barrier(Node* ctl,
                            Node* store,
                            Node* obj,
                            Node* adr,
-                            uint adr_idx,
-                            Node *val,
+                            uint  adr_idx,
+                            Node* val,
                            BasicType bt,
                            bool use_precise) {
  BarrierSet* bs = Universe::heap()->barrier_set();
@ -1413,7 +1414,7 @@ void GraphKit::post_barrier(Node* ctl,
  switch (bs->kind()) {
    case BarrierSet::G1SATBCT:
    case BarrierSet::G1SATBCTLogging:
-        g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
+      g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
      break;

    case BarrierSet::CardTableModRef:
@ -1431,42 +1432,36 @@ void GraphKit::post_barrier(Node* ctl,
  }
 }

-Node* GraphKit::store_oop_to_object(Node* ctl,
-                                    Node* obj,
-                                    Node* adr,
-                                    const TypePtr* adr_type,
-                                    Node *val,
-                                    const TypeOopPtr* val_type,
-                                    BasicType bt) {
+Node* GraphKit::store_oop(Node* ctl,
+                          Node* obj,
+                          Node* adr,
+                          const TypePtr* adr_type,
+                          Node* val,
+                          const TypeOopPtr* val_type,
+                          BasicType bt,
+                          bool use_precise) {
+
+  set_control(ctl);
+  if (stopped()) return top(); // Dead path ?
+
+  assert(bt == T_OBJECT, "sanity");
+  assert(val != NULL, "not dead path");
  uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, false);
-  return store;
-}
-
-Node* GraphKit::store_oop_to_array(Node* ctl,
-                                   Node* obj,
-                                   Node* adr,
-                                   const TypePtr* adr_type,
-                                   Node *val,
-                                   const TypeOopPtr* val_type,
-                                   BasicType bt) {
-  uint adr_idx = C->get_alias_index(adr_type);
-  Node* store;
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+
+  pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt);
+  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
  return store;
 }

+// Could be an array or object we don't know at compile time (unsafe ref.)
 Node* GraphKit::store_oop_to_unknown(Node* ctl,
-                                     Node* obj,
-                                     Node* adr,
-                                     const TypePtr* adr_type,
-                                     Node *val,
-                                     BasicType bt) {
+                             Node* obj,   // containing obj
+                             Node* adr,  // actual adress to store val at
+                             const TypePtr* adr_type,
+                             Node* val,
+                             BasicType bt) {
  Compile::AliasType* at = C->alias_type(adr_type);
  const TypeOopPtr* val_type = NULL;
  if (adr_type->isa_instptr()) {
@ -1485,12 +1480,7 @@ Node* GraphKit::store_oop_to_unknown(Node* ctl,
  if (val_type == NULL) {
    val_type = TypeInstPtr::BOTTOM;
  }
-
-  uint adr_idx = at->index();
-  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
-  return store;
+  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
 }


@ -1804,93 +1794,6 @@ Node* GraphKit::just_allocated_object(Node* current_control) {
 }


-//------------------------------store_barrier----------------------------------
-// Insert a write-barrier store.  This is to let generational GC work; we have
-// to flag all oop-stores before the next GC point.
-void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr,
-                                  Node* val, bool use_precise) {
-  // No store check needed if we're storing a NULL or an old object
-  // (latter case is probably a string constant). The concurrent
-  // mark sweep garbage collector, however, needs to have all nonNull
-  // oop updates flagged via card-marks.
-  if (val != NULL && val->is_Con()) {
-    // must be either an oop or NULL
-    const Type* t = val->bottom_type();
-    if (t == TypePtr::NULL_PTR || t == Type::TOP)
-      // stores of null never (?) need barriers
-      return;
-    ciObject* con = t->is_oopptr()->const_oop();
-    if (con != NULL
-        && con->is_perm()
-        && Universe::heap()->can_elide_permanent_oop_store_barriers())
-      // no store barrier needed, because no old-to-new ref created
-      return;
-  }
-
-  if (use_ReduceInitialCardMarks()
-      && obj == just_allocated_object(control())) {
-    // We can skip marks on a freshly-allocated object.
-    // Keep this code in sync with do_eager_card_mark in runtime.cpp.
-    // That routine eagerly marks the occasional object which is produced
-    // by the slow path, so that we don't have to do it here.
-    return;
-  }
-
-  if (!use_precise) {
-    // All card marks for a (non-array) instance are in one place:
-    adr = obj;
-  }
-  // (Else it's an array (or unknown), and we want more precise card marks.)
-  assert(adr != NULL, "");
-
-  // Get the alias_index for raw card-mark memory
-  int adr_type = Compile::AliasIdxRaw;
-  // Convert the pointer to an int prior to doing math on it
-  Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr));
-  // Divide by card size
-  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
-         "Only one we handle so far.");
-  CardTableModRefBS* ct =
-    (CardTableModRefBS*)(Universe::heap()->barrier_set());
-  Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
-  // We store into a byte array, so do not bother to left-shift by zero
-  Node *c = byte_map_base_node();
-  // Combine
-  Node *sb_ctl = control();
-  Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
-  Node *sb_val = _gvn.intcon(0);
-  // Smash zero into card
-  if( !UseConcMarkSweepGC ) {
-    BasicType bt = T_BYTE;
-    store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type);
-  } else {
-    // Specialized path for CM store barrier
-    cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store);
-  }
-}
-
-// Specialized path for CMS store barrier
-void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) {
-  BasicType bt = T_BYTE;
-  int adr_idx = Compile::AliasIdxRaw;
-  Node* mem = memory(adr_idx);
-
-  // The type input is NULL in PRODUCT builds
-  const TypePtr* type = NULL;
-  debug_only(type = C->get_adr_type(adr_idx));
-
-  // Add required edge to oop_store, optimizer does not support precedence edges.
-  // Convert required edge to precedence edge before allocation.
-  Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) );
-  set_memory(store, adr_idx);
-
-  // For CMS, back-to-back card-marks can only remove the first one
-  // and this requires DU info.  Push on worklist for optimizer.
-  if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
-    record_for_igvn(store);
-}
-
-
 void GraphKit::round_double_arguments(ciMethod* dest_method) {
  // (Note:  TypeFunc::make has a cache that makes this fast.)
  const TypeFunc* tf    = TypeFunc::make(dest_method);
@ -3215,6 +3118,79 @@ InitializeNode* AllocateNode::initialization() {
  return NULL;
 }

+//----------------------------- store barriers ----------------------------
+#define __ ideal.
+
+void GraphKit::sync_kit(IdealKit& ideal) {
+  // Final sync IdealKit and graphKit.
+  __ drain_delay_transform();
+  set_all_memory(__ merged_memory());
+  set_control(__ ctrl());
+}
+
+// vanilla/CMS post barrier
+// Insert a write-barrier store.  This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void GraphKit::write_barrier_post(Node* oop_store,
+                                  Node* obj,
+                                  Node* adr,
+                                  Node* val,
+                                  bool use_precise) {
+  // No store check needed if we're storing a NULL or an old object
+  // (latter case is probably a string constant). The concurrent
+  // mark sweep garbage collector, however, needs to have all nonNull
+  // oop updates flagged via card-marks.
+  if (val != NULL && val->is_Con()) {
+    // must be either an oop or NULL
+    const Type* t = val->bottom_type();
+    if (t == TypePtr::NULL_PTR || t == Type::TOP)
+      // stores of null never (?) need barriers
+      return;
+    ciObject* con = t->is_oopptr()->const_oop();
+    if (con != NULL
+        && con->is_perm()
+        && Universe::heap()->can_elide_permanent_oop_store_barriers())
+      // no store barrier needed, because no old-to-new ref created
+      return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(gvn(), control(), merged_memory(), true);
+
+  // Convert the pointer to an int prior to doing math on it
+  Node* cast = __ CastPX(__ ctrl(), adr);
+
+  // Divide by card size
+  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
+         "Only one we handle so far.");
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
+
+  // Combine card table base and card offset
+  Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
+
+  // Get the alias_index for raw card-mark memory
+  int adr_type = Compile::AliasIdxRaw;
+  // Smash zero into card
+  Node*   zero = __ ConI(0);
+  BasicType bt = T_BYTE;
+  if( !UseConcMarkSweepGC ) {
+    __ store(__ ctrl(), card_adr, zero, bt, adr_type);
+  } else {
+    // Specialized path for CM store barrier
+    __ storeCM(__ ctrl(), card_adr, zero, oop_store, bt, adr_type);
+  }
+
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
+
+// G1 pre/post barriers
 void GraphKit::g1_write_barrier_pre(Node* obj,
                                    Node* adr,
                                    uint alias_idx,
@ -3222,10 +3198,8 @@ void GraphKit::g1_write_barrier_pre(Node* obj,
                                    const TypeOopPtr* val_type,
                                    BasicType bt) {
  IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();

-  Node* thread = __ thread();
+  Node* tls = __ thread(); // ThreadLocalStorage

  Node* no_ctrl = NULL;
  Node* no_base = __ top();
@ -3248,9 +3222,9 @@ void GraphKit::g1_write_barrier_pre(Node* obj,

  // set_control( ctl);

-  Node* marking_adr = __ AddP(no_base, thread, __ ConX(marking_offset));
-  Node* buffer_adr  = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr   = __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));

  // Now some of the values

@ -3278,55 +3252,52 @@ void GraphKit::g1_write_barrier_pre(Node* obj,
        Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
        Node* next_indexX = next_index;
 #ifdef _LP64
-          // We could refine the type for what it's worth
-          // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
-          next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
-#endif // _LP64
+        // We could refine the type for what it's worth
+        // const TypeLong* lidxtype = TypeLong::make(CONST64(0), get_size_from_queue);
+        next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
+#endif

        // Now get the buffer location we will log the original value into and store it
-
        Node *log_addr = __ AddP(no_base, buffer, next_indexX);
-        // __ store(__ ctrl(), log_addr, orig, T_OBJECT, C->get_alias_index(TypeOopPtr::BOTTOM));
        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);

-
        // update the index
-        // __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
-        // This is a hack to force this store to occur before the oop store that is coming up
-        __ store(__ ctrl(), index_adr, next_index, T_INT, C->get_alias_index(TypeOopPtr::BOTTOM));
+        __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);

      } __ else_(); {

        // logging buffer is full, call the runtime
        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        // __ make_leaf_call(tf, OptoRuntime::g1_wb_pre_Java(), "g1_wb_pre", orig, thread);
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, thread);
-      } __ end_if();
-    } __ end_if();
-  } __ end_if();
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls);
+      } __ end_if();  // (!index)
+    } __ end_if();  // (orig != NULL)
+  } __ end_if();  // (!marking)

-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
-
-#undef __
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
 }

 //
 // Update the card table and add card address to the queue
 //
-void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr, Node* buffer, const TypeFunc* tf) {
-#define __ ideal->
+void GraphKit::g1_mark_card(IdealKit& ideal,
+                            Node* card_adr,
+                            Node* oop_store,
+                            Node* index,
+                            Node* index_adr,
+                            Node* buffer,
+                            const TypeFunc* tf) {
+
  Node* zero = __ ConI(0);
  Node* no_base = __ top();
  BasicType card_bt = T_BYTE;
  // Smash zero into card. MUST BE ORDERED WRT TO STORE
-  __ storeCM(__ ctrl(), card_adr, zero, store, card_bt, Compile::AliasIdxRaw);
+  __ storeCM(__ ctrl(), card_adr, zero, oop_store, card_bt, Compile::AliasIdxRaw);

  //  Now do the queue work
  __ if_then(index, BoolTest::ne, zero); {

-    Node* next_index = __ SubI(index,  __ ConI(sizeof(intptr_t)));
+    Node* next_index = __ SubI(index, __ ConI(sizeof(intptr_t)));
    Node* next_indexX = next_index;
 #ifdef _LP64
    // We could refine the type for what it's worth
@ -3341,10 +3312,10 @@ void GraphKit::g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node*
  } __ else_(); {
    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
  } __ end_if();
-#undef __
+
 }

-void GraphKit::g1_write_barrier_post(Node* store,
+void GraphKit::g1_write_barrier_post(Node* oop_store,
                                     Node* obj,
                                     Node* adr,
                                     uint alias_idx,
@ -3369,10 +3340,8 @@ void GraphKit::g1_write_barrier_post(Node* store,
  assert(adr != NULL, "");

  IdealKit ideal(gvn(), control(), merged_memory(), true);
-#define __ ideal.
-  __ declares_done();

-  Node* thread = __ thread();
+  Node* tls = __ thread(); // ThreadLocalStorage

  Node* no_ctrl = NULL;
  Node* no_base = __ top();
@ -3394,8 +3363,8 @@ void GraphKit::g1_write_barrier_post(Node* store,

  // Pointers into the thread

-  Node* buffer_adr = __ AddP(no_base, thread, __ ConX(buffer_offset));
-  Node* index_adr =  __ AddP(no_base, thread, __ ConX(index_offset));
+  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));

  // Now some values

@ -3404,18 +3373,14 @@ void GraphKit::g1_write_barrier_post(Node* store,


  // Convert the store obj pointer to an int prior to doing math on it
-  // Use addr not obj gets accurate card marks
-
-  // Node* cast = __ CastPX(no_ctrl, adr /* obj */);
-
  // Must use ctrl to prevent "integerized oop" existing across safepoint
-  Node* cast =  __ CastPX(__ ctrl(), ( use_precise ? adr : obj ));
+  Node* cast =  __ CastPX(__ ctrl(), adr);

  // Divide pointer by card size
  Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );

  // Combine card table base and card offset
-  Node *card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
+  Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );

  // If we know the value being stored does it cross regions?

@ -3439,18 +3404,17 @@ void GraphKit::g1_write_barrier_post(Node* store,
        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);

        __ if_then(card_val, BoolTest::ne, zero); {
-          g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+          g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
        } __ end_if();
      } __ end_if();
    } __ end_if();
  } else {
-    g1_mark_card(&ideal, card_adr, store, index, index_adr, buffer, tf);
+    // Object.clone() instrinsic uses this path.
+    g1_mark_card(ideal, card_adr, oop_store, index, index_adr, buffer, tf);
  }

-
-  __ drain_delay_transform();
-  set_control( __ ctrl());
-  set_all_memory( __ merged_memory());
+  // Final sync IdealKit and GraphKit.
+  sync_kit(ideal);
+}
 #undef __

-}
--- a/hotspot/src/share/vm/opto/graphKit.hpp
+++ b/hotspot/src/share/vm/opto/graphKit.hpp
@ -449,13 +449,24 @@ class GraphKit : public Phase {
  //
  // If val==NULL, it is taken to be a completely unknown value. QQQ

+  Node* store_oop(Node* ctl,
+                  Node* obj,   // containing obj
+                  Node* adr,  // actual adress to store val at
+                  const TypePtr* adr_type,
+                  Node* val,
+                  const TypeOopPtr* val_type,
+                  BasicType bt,
+                  bool use_precise);
+
  Node* store_oop_to_object(Node* ctl,
                            Node* obj,   // containing obj
                            Node* adr,  // actual adress to store val at
                            const TypePtr* adr_type,
                            Node* val,
                            const TypeOopPtr* val_type,
-                            BasicType bt);
+                            BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false);
+  }

  Node* store_oop_to_array(Node* ctl,
                           Node* obj,   // containing obj
@ -463,7 +474,9 @@ class GraphKit : public Phase {
                           const TypePtr* adr_type,
                           Node* val,
                           const TypeOopPtr* val_type,
-                           BasicType bt);
+                           BasicType bt) {
+    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true);
+  }

  // Could be an array or object we don't know at compile time (unsafe ref.)
  Node* store_oop_to_unknown(Node* ctl,
@ -488,9 +501,6 @@ class GraphKit : public Phase {
  // Return a load of array element at idx.
  Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);

-  // CMS card-marks have an input from the corresponding oop_store
-  void  cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store);
-
  //---------------- Dtrace support --------------------
  void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry);
  void make_dtrace_method_entry(ciMethod* method) {
@ -582,9 +592,6 @@ class GraphKit : public Phase {
    return C->too_many_recompiles(method(), bci(), reason);
  }

-  // vanilla/CMS post barrier
-  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
-
  // Returns the object (if any) which was created the moment before.
  Node* just_allocated_object(Node* current_control);

@ -593,6 +600,11 @@ class GraphKit : public Phase {
            && Universe::heap()->can_elide_tlab_store_barriers());
  }

+  void sync_kit(IdealKit& ideal);
+
+  // vanilla/CMS post barrier
+  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
+
  // G1 pre/post barriers
  void g1_write_barrier_pre(Node* obj,
                            Node* adr,
@ -610,7 +622,7 @@ class GraphKit : public Phase {
                             bool use_precise);
  // Helper function for g1
  private:
-  void g1_mark_card(IdealKit* ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
+  void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store,  Node* index, Node* index_adr,
                    Node* buffer, const TypeFunc* tf);

  public:
--- a/hotspot/src/share/vm/opto/idealKit.cpp
+++ b/hotspot/src/share/vm/opto/idealKit.cpp
@ -34,7 +34,7 @@
 const uint IdealKit::first_var = TypeFunc::Parms + 1;

 //----------------------------IdealKit-----------------------------------------
-IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) :
+IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms, bool has_declarations) :
  _gvn(gvn), C(gvn.C) {
  _initial_ctrl = control;
  _initial_memory = mem;
@ -47,6 +47,9 @@ IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_trans
  _pending_cvstates = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
  _delay_transform  = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
  DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray<int>(C->node_arena(), init_size, 0, 0));
+  if (!has_declarations) {
+     declarations_done();
+  }
 }

 //-------------------------------if_then-------------------------------------
@ -97,7 +100,7 @@ void IdealKit::else_() {
 //-------------------------------end_if-------------------------------------
 // Merge the "then" and "else" cvstates.
 //
-// The if_then() pushed the current state for later use
+// The if_then() pushed a copy of the current state for later use
 // as the initial state for a future "else" clause.  The
 // current state then became the initial state for the
 // then clause.  If an "else" clause was encountered, it will
@ -258,8 +261,8 @@ Node* IdealKit::promote_to_phi(Node* n, Node* reg) {
  return delay_transform(PhiNode::make(reg, n, ct));
 }

-//-----------------------------declares_done-----------------------------------
-void IdealKit::declares_done() {
+//-----------------------------declarations_done-------------------------------
+void IdealKit::declarations_done() {
  _cvstate = new_cvstate();   // initialize current cvstate
  set_ctrl(_initial_ctrl);    // initialize control in current cvstate
  set_all_memory(_initial_memory);// initialize memory in current cvstate
@ -277,7 +280,9 @@ Node* IdealKit::transform(Node* n) {

 //-----------------------------delay_transform-----------------------------------
 Node* IdealKit::delay_transform(Node* n) {
-  gvn().set_type(n, n->bottom_type());
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    gvn().set_type(n, n->bottom_type());
+  }
  _delay_transform->push(n);
  return n;
 }
@ -321,7 +326,9 @@ IdealVariable::IdealVariable(IdealKit &k) {
 Node* IdealKit::memory(uint alias_idx) {
  MergeMemNode* mem = merged_memory();
  Node* p = mem->memory_at(alias_idx);
-  _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  if (!gvn().is_IterGVN() || !gvn().is_IterGVN()->delay_transform()) {
+    _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  }
  return p;
 }

@ -462,9 +469,6 @@ void IdealKit::make_leaf_call(const TypeFunc *slow_call_type,
  const TypePtr* adr_type = TypeRawPtr::BOTTOM;
  uint adr_idx = C->get_alias_index(adr_type);

-  // Clone initial memory
-  MergeMemNode* cloned_mem =  MergeMemNode::make(C, merged_memory());
-
  // Slow-path leaf call
  int size = slow_call_type->domain()->cnt();
  CallNode *call =  (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type);
@ -489,9 +493,6 @@ void IdealKit::make_leaf_call(const TypeFunc *slow_call_type,

  set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));

-  // Set the incoming clone of memory as current memory
-  set_all_memory(cloned_mem);
-
  // Make memory for the call
  Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );

--- a/hotspot/src/share/vm/opto/idealKit.hpp
+++ b/hotspot/src/share/vm/opto/idealKit.hpp
@ -49,7 +49,7 @@
 // Example:
 //    Node* limit = ??
 //    IdealVariable i(kit), j(kit);
-//    declares_done();
+//    declarations_done();
 //    Node* exit = make_label(1); // 1 goto
 //    set(j, ConI(0));
 //    loop(i, ConI(0), BoolTest::lt, limit); {
@ -101,10 +101,7 @@ class IdealKit: public StackObj {
  Node* new_cvstate();                     // Create a new cvstate
  Node* cvstate() { return _cvstate; }     // current cvstate
  Node* copy_cvstate();                    // copy current cvstate
-  void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); }

-  // Should this assert this is a MergeMem???
-  void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); }
  void set_memory(Node* mem, uint alias_idx );
  void do_memory_merge(Node* merging, Node* join);
  void clear(Node* m);                     // clear a cvstate
@ -132,15 +129,17 @@ class IdealKit: public StackObj {
  Node* memory(uint alias_idx);

 public:
-  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false);
+  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false, bool has_declarations = false);
  ~IdealKit() {
    stop();
    drain_delay_transform();
  }
  // Control
  Node* ctrl()                          { return _cvstate->in(TypeFunc::Control); }
+  void set_ctrl(Node* ctrl)             { _cvstate->set_req(TypeFunc::Control, ctrl); }
  Node* top()                           { return C->top(); }
  MergeMemNode* merged_memory()         { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); }
+  void set_all_memory(Node* mem)        { _cvstate->set_req(TypeFunc::Memory, mem); }
  void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); }
  Node* value(IdealVariable& v)         { return _cvstate->in(first_var + v.id()); }
  void dead(IdealVariable& v)           { set(v, (Node*)NULL); }
@ -155,7 +154,7 @@ class IdealKit: public StackObj {
  Node* make_label(int goto_ct);
  void bind(Node* lab);
  void goto_(Node* lab, bool bind = false);
-  void declares_done();
+  void declarations_done();
  void drain_delay_transform();

  Node* IfTrue(IfNode* iff)  { return transform(new (C,1) IfTrueNode(iff)); }
--- a/hotspot/src/share/vm/opto/ifnode.cpp
+++ b/hotspot/src/share/vm/opto/ifnode.cpp
@ -378,7 +378,18 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {

  // Force the original merge dead
  igvn->hash_delete(r);
-  r->set_req_X(0,NULL,igvn);
+  // First, remove region's dead users.
+  for (DUIterator_Last lmin, l = r->last_outs(lmin); l >= lmin;) {
+    Node* u = r->last_out(l);
+    if( u == r ) {
+      r->set_req(0, NULL);
+    } else {
+      assert(u->outcnt() == 0, "only dead users");
+      igvn->remove_dead_node(u);
+    }
+    l -= 1;
+  }
+  igvn->remove_dead_node(r);

  // Now remove the bogus extra edges used to keep things alive
  igvn->remove_dead_node( hook );
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@ -310,11 +310,6 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
    if (!InlineAtomicLong)  return NULL;
    break;

-  case vmIntrinsics::_Object_init:
-  case vmIntrinsics::_invoke:
-    // We do not intrinsify these; they are marked for other purposes.
-    return NULL;
-
  case vmIntrinsics::_getCallerClass:
    if (!UseNewReflection)  return NULL;
    if (!InlineReflectionGetCallerClass)  return NULL;
@ -327,6 +322,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
    break;

 default:
+    assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
+    assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
    break;
  }

@ -394,18 +391,11 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
  }

  if (PrintIntrinsics) {
-    switch (intrinsic_id()) {
-    case vmIntrinsics::_invoke:
-    case vmIntrinsics::_Object_init:
-      // We do not expect to inline these, so do not produce any noise about them.
-      break;
-    default:
-      tty->print("Did not inline intrinsic %s%s at bci:%d in",
-                 vmIntrinsics::name_at(intrinsic_id()),
-                 (is_virtual() ? " (virtual)" : ""), kit.bci());
-      kit.caller()->print_short_name(tty);
-      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
-    }
+    tty->print("Did not inline intrinsic %s%s at bci:%d in",
+               vmIntrinsics::name_at(intrinsic_id()),
+               (is_virtual() ? " (virtual)" : ""), kit.bci());
+    kit.caller()->print_short_name(tty);
+    tty->print_cr(" (%d bytes)", kit.caller()->code_size());
  }
  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
  return NULL;
@ -1030,7 +1020,7 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar
  const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
  const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);

-  IdealKit kit(gvn(), control(), merged_memory());
+  IdealKit kit(gvn(), control(), merged_memory(), false, true);
 #define __ kit.
  Node* zero             = __ ConI(0);
  Node* one              = __ ConI(1);
@ -1042,7 +1032,7 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar
  Node* targetOffset     = __ ConI(targetOffset_i);
  Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);

-  IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
+  IdealVariable rtn(kit), i(kit), j(kit); __ declarations_done();
  Node* outer_loop = __ make_label(2 /* goto */);
  Node* return_    = __ make_label(1);

@ -1079,9 +1069,9 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar
       __ bind(outer_loop);
  }__ end_loop(); __ dead(i);
  __ bind(return_);
-  __ drain_delay_transform();

-  set_control(__ ctrl());
+  // Final sync IdealKit and GraphKit.
+  sync_kit(kit);
  Node* result = __ value(rtn);
 #undef __
  C->set_has_loops(true);
@ -2183,14 +2173,23 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas
        // of it. So we need to emit code to conditionally do the proper type of
        // store.

-        IdealKit kit(gvn(), control(),  merged_memory());
-        kit.declares_done();
+        IdealKit ideal(gvn(), control(),  merged_memory());
+#define __ ideal.
        // QQQ who knows what probability is here??
-        kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
-          (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
-        } kit.else_(); {
-          (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
-        } kit.end_if();
+        __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
+          // Sync IdealKit and graphKit.
+          set_all_memory( __ merged_memory());
+          set_control(__ ctrl());
+          Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type);
+          // Update IdealKit memory.
+          __ set_all_memory(merged_memory());
+          __ set_ctrl(control());
+        } __ else_(); {
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile);
+        } __ end_if();
+        // Final sync IdealKit and GraphKit.
+        sync_kit(ideal);
+#undef __
      }
    }
  }
--- a/hotspot/src/share/vm/opto/loopopts.cpp
+++ b/hotspot/src/share/vm/opto/loopopts.cpp
@ -346,7 +346,10 @@ Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {

    // Yes!  Reshape address expression!
    Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale );
-    register_new_node( inv_scale, add_invar_ctrl );
+    Node *inv_scale_ctrl =
+      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
+      add_invar_ctrl : scale_ctrl;
+    register_new_node( inv_scale, inv_scale_ctrl );
    Node *var_scale = new (C, 3) LShiftINode( add_var, scale );
    register_new_node( var_scale, n_ctrl );
    Node *var_add = new (C, 3) AddINode( var_scale, inv_scale );
--- a/hotspot/src/share/vm/opto/machnode.cpp
+++ b/hotspot/src/share/vm/opto/machnode.cpp
@ -300,6 +300,12 @@ const Node* MachNode::get_base_and_disp(intptr_t &offset, const TypePtr* &adr_ty
          }
        }
        adr_type = t_disp->add_offset(offset);
+      } else if( base == NULL && offset != 0 && offset != Type::OffsetBot ) {
+        // Use ideal type if it is oop ptr.
+        const TypePtr *tp = oper->type()->isa_ptr();
+        if( tp != NULL) {
+          adr_type = tp;
+        }
      }
    }

--- a/hotspot/src/share/vm/opto/macro.cpp
+++ b/hotspot/src/share/vm/opto/macro.cpp
@ -198,14 +198,79 @@ void PhaseMacroExpand::extract_call_projections(CallNode *call) {
 }

 // Eliminate a card mark sequence.  p2x is a ConvP2XNode
-void PhaseMacroExpand::eliminate_card_mark(Node *p2x) {
+void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
  assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required");
-  Node *shift = p2x->unique_out();
-  Node *addp = shift->unique_out();
-  for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
-    Node *st = addp->last_out(j);
-    assert(st->is_Store(), "store required");
-    _igvn.replace_node(st, st->in(MemNode::Memory));
+  if (!UseG1GC) {
+    // vanilla/CMS post barrier
+    Node *shift = p2x->unique_out();
+    Node *addp = shift->unique_out();
+    for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
+      Node *st = addp->last_out(j);
+      assert(st->is_Store(), "store required");
+      _igvn.replace_node(st, st->in(MemNode::Memory));
+    }
+  } else {
+    // G1 pre/post barriers
+    assert(p2x->outcnt() == 2, "expects 2 users: Xor and URShift nodes");
+    // It could be only one user, URShift node, in Object.clone() instrinsic
+    // but the new allocation is passed to arraycopy stub and it could not
+    // be scalar replaced. So we don't check the case.
+
+    // Remove G1 post barrier.
+
+    // Search for CastP2X->Xor->URShift->Cmp path which
+    // checks if the store done to a different from the value's region.
+    // And replace Cmp with #0 (false) to collapse G1 post barrier.
+    Node* xorx = NULL;
+    for (DUIterator_Fast imax, i = p2x->fast_outs(imax); i < imax; i++) {
+      Node* u = p2x->fast_out(i);
+      if (u->Opcode() == Op_XorX) {
+        xorx = u;
+        break;
+      }
+    }
+    assert(xorx != NULL, "missing G1 post barrier");
+    Node* shift = xorx->unique_out();
+    Node* cmpx = shift->unique_out();
+    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
+    cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
+    "missing region check in G1 post barrier");
+    _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+
+    // Remove G1 pre barrier.
+
+    // Search "if (marking != 0)" check and set it to "false".
+    Node* this_region = p2x->in(0);
+    assert(this_region != NULL, "");
+    // There is no G1 pre barrier if previous stored value is NULL
+    // (for example, after initialization).
+    if (this_region->is_Region() && this_region->req() == 3) {
+      int ind = 1;
+      if (!this_region->in(ind)->is_IfFalse()) {
+        ind = 2;
+      }
+      if (this_region->in(ind)->is_IfFalse()) {
+        Node* bol = this_region->in(ind)->in(0)->in(1);
+        assert(bol->is_Bool(), "");
+        cmpx = bol->in(1);
+        if (bol->as_Bool()->_test._test == BoolTest::ne &&
+            cmpx->is_Cmp() && cmpx->in(2) == intcon(0) &&
+            cmpx->in(1)->is_Load()) {
+          Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
+          const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() +
+                                              PtrQueue::byte_offset_of_active());
+          if (adr->is_AddP() && adr->in(AddPNode::Base) == top() &&
+              adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
+              adr->in(AddPNode::Offset) == MakeConX(marking_offset)) {
+            _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
+          }
+        }
+      }
+    }
+    // Now CastP2X can be removed since it is used only on dead path
+    // which currently still alive until igvn optimize it.
+    assert(p2x->unique_out()->Opcode() == Op_URShiftX, "");
+    _igvn.replace_node(p2x, top());
  }
 }

@ -760,14 +825,11 @@ void PhaseMacroExpand::process_users_of_allocation(AllocateNode *alloc) {
          if (n->is_Store()) {
            _igvn.replace_node(n, n->in(MemNode::Memory));
          } else {
-            assert( n->Opcode() == Op_CastP2X, "CastP2X required");
            eliminate_card_mark(n);
          }
          k -= (oc2 - use->outcnt());
        }
      } else {
-        assert( !use->is_SafePoint(), "safepoint uses must have been already elimiated");
-        assert( use->Opcode() == Op_CastP2X, "CastP2X required");
        eliminate_card_mark(use);
      }
      j -= (oc1 - res->outcnt());
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@ -1489,8 +1489,7 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
 #ifdef ASSERT
    // Verify adr type after matching memory operation
    const MachOper* oper = mach->memory_operand();
-    if (oper != NULL && oper != (MachOper*)-1 &&
-        mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode
+    if (oper != NULL && oper != (MachOper*)-1) {
      // It has a unique memory operand.  Find corresponding ideal mem node.
      Node* m = NULL;
      if (leaf->is_Mem()) {
--- a/hotspot/src/share/vm/opto/output.cpp
+++ b/hotspot/src/share/vm/opto/output.cpp
@ -50,6 +50,13 @@ void Compile::Output() {
  init_scratch_buffer_blob();
  if (failing())  return; // Out of memory

+  // The number of new nodes (mostly MachNop) is proportional to
+  // the number of java calls and inner loops which are aligned.
+  if ( C->check_node_count((NodeLimitFudgeFactor + C->java_calls()*3 +
+                            C->inner_loops()*(OptoLoopAlignment-1)),
+                           "out of nodes before code generation" ) ) {
+    return;
+  }
  // Make sure I can find the Start Node
  Block_Array& bbs = _cfg->_bbs;
  Block *entry = _cfg->_blocks[1];
@ -1105,7 +1112,7 @@ void Compile::Fill_buffer() {
  uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);

  uint  return_offset = 0;
-  MachNode *nop = new (this) MachNopNode();
+  int nop_size = (new (this) MachNopNode())->size(_regalloc);

  int previous_offset = 0;
  int current_offset  = 0;
@ -1188,7 +1195,6 @@ void Compile::Fill_buffer() {
        }

        // align the instruction if necessary
-        int nop_size = nop->size(_regalloc);
        int padding = mach->compute_padding(current_offset);
        // Make sure safepoint node for polling is distinct from a call's
        // return by adding a nop if needed.
@ -1372,7 +1378,6 @@ void Compile::Fill_buffer() {

    // If the next block is the top of a loop, pad this block out to align
    // the loop top a little. Helps prevent pipe stalls at loop back branches.
-    int nop_size = (new (this) MachNopNode())->size(_regalloc);
    if( i<_cfg->_num_blocks-1 ) {
      Block *nb = _cfg->_blocks[i+1];
      uint padding = nb->alignment_padding(current_offset);
--- a/hotspot/src/share/vm/opto/phaseX.hpp
+++ b/hotspot/src/share/vm/opto/phaseX.hpp
@ -450,6 +450,8 @@ public:
    subsume_node(old, nn);
  }

+  bool delay_transform() const { return _delay_transform; }
+
  void set_delay_transform(bool delay) {
    _delay_transform = delay;
  }
--- a/hotspot/src/share/vm/opto/type.hpp
+++ b/hotspot/src/share/vm/opto/type.hpp
@ -1216,6 +1216,8 @@ inline bool Type::is_floatingpoint() const {
 #define Op_AndX      Op_AndL
 #define Op_AddX      Op_AddL
 #define Op_SubX      Op_SubL
+#define Op_XorX      Op_XorL
+#define Op_URShiftX  Op_URShiftL
 // conversions
 #define ConvI2X(x)   ConvI2L(x)
 #define ConvL2X(x)   (x)
@ -1258,6 +1260,8 @@ inline bool Type::is_floatingpoint() const {
 #define Op_AndX      Op_AndI
 #define Op_AddX      Op_AddI
 #define Op_SubX      Op_SubI
+#define Op_XorX      Op_XorI
+#define Op_URShiftX  Op_URShiftI
 // conversions
 #define ConvI2X(x)   (x)
 #define ConvL2X(x)   ConvL2I(x)
--- a/hotspot/src/share/vm/prims/unsafe.cpp
+++ b/hotspot/src/share/vm/prims/unsafe.cpp
@ -1048,7 +1048,11 @@ UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe,
  oop e = JNIHandles::resolve(e_h);
  oop p = JNIHandles::resolve(obj);
  HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
-  update_barrier_set_pre((void*)addr, e);
+  if (UseCompressedOops) {
+    update_barrier_set_pre((narrowOop*)addr, e);
+  } else {
+    update_barrier_set_pre((oop*)addr, e);
+  }
  oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e);
  jboolean success  = (res == e);
  if (success)
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@ -1202,18 +1202,13 @@ void Arguments::set_ergonomics_flags() {
  }

 #ifdef _LP64
-  // Compressed Headers do not work with CMS, which uses a bit in the klass
-  // field offset to determine free list chunk markers.
  // Check that UseCompressedOops can be set with the max heap size allocated
  // by ergonomics.
  if (MaxHeapSize <= max_heap_for_compressed_oops()) {
-    if (FLAG_IS_DEFAULT(UseCompressedOops) && !UseG1GC) {
+    if (FLAG_IS_DEFAULT(UseCompressedOops)) {
      // Turn off until bug is fixed.
      // the following line to return it to default status.
      // FLAG_SET_ERGO(bool, UseCompressedOops, true);
-    } else if (UseCompressedOops && UseG1GC) {
-      warning(" UseCompressedOops does not currently work with UseG1GC; switching off UseCompressedOops. ");
-      FLAG_SET_DEFAULT(UseCompressedOops, false);
    }
 #ifdef _WIN64
    if (UseLargePages && UseCompressedOops) {
@ -1454,6 +1449,7 @@ bool Arguments::check_gc_consistency() {
  if (UseSerialGC)                       i++;
  if (UseConcMarkSweepGC || UseParNewGC) i++;
  if (UseParallelGC || UseParallelOldGC) i++;
+  if (UseG1GC)                           i++;
  if (i > 1) {
    jio_fprintf(defaultStream::error_stream(),
                "Conflicting collector combinations in option list; "
@ -2603,22 +2599,6 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
    return result;
  }

-  // These are hacks until G1 is fully supported and tested
-  // but lets you force -XX:+UseG1GC in PRT and get it where it (mostly) works
-  if (UseG1GC) {
-    if (UseConcMarkSweepGC || UseParNewGC || UseParallelGC || UseParallelOldGC || UseSerialGC) {
-#ifndef PRODUCT
-      tty->print_cr("-XX:+UseG1GC is incompatible with other collectors, using UseG1GC");
-#endif // PRODUCT
-      UseConcMarkSweepGC = false;
-      UseParNewGC        = false;
-      UseParallelGC      = false;
-      UseParallelOldGC   = false;
-      UseSerialGC        = false;
-    }
-    no_shared_spaces();
-  }
-
 #ifndef PRODUCT
  if (TraceBytecodesAt != 0) {
    TraceBytecodes = true;
@ -2676,10 +2656,7 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
  } else if (UseParNewGC) {
    // Set some flags for ParNew
    set_parnew_gc_flags();
-  }
-  // Temporary; make the "if" an "else-if" before
-  // we integrate G1. XXX
-  if (UseG1GC) {
+  } else if (UseG1GC) {
    // Set some flags for garbage-first, if needed.
    set_g1_gc_flags();
  }
--- a/hotspot/src/share/vm/runtime/safepoint.cpp
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp
@ -49,7 +49,7 @@ void SafepointSynchronize::begin() {
    // In the future we should investigate whether CMS can use the
    // more-general mechanism below.  DLD (01/05).
    ConcurrentMarkSweepThread::synchronize(false);
-  } else {
+  } else if (UseG1GC) {
    ConcurrentGCThread::safepoint_synchronize();
  }
 #endif // SERIALGC
@ -400,7 +400,7 @@ void SafepointSynchronize::end() {
  // If there are any concurrent GC threads resume them.
  if (UseConcMarkSweepGC) {
    ConcurrentMarkSweepThread::desynchronize(false);
-  } else {
+  } else if (UseG1GC) {
    ConcurrentGCThread::safepoint_desynchronize();
  }
 #endif // SERIALGC
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp
@ -119,6 +119,7 @@ JRT_LEAF(void, SharedRuntime::g1_wb_pre(oopDesc* orig, JavaThread *thread))
    assert(false, "should be optimized out");
    return;
  }
+  assert(orig->is_oop(true /* ignore mark word */), "Error");
  // store the original value that was in the field reference
  thread->satb_mark_queue().enqueue(orig);
 JRT_END
--- a/hotspot/src/share/vm/runtime/stackValue.cpp
+++ b/hotspot/src/share/vm/runtime/stackValue.cpp
@ -104,7 +104,17 @@ StackValue* StackValue::create_stack_value(const frame* fr, const RegisterMap* r
    }
 #endif
    case Location::oop: {
-      Handle h(*(oop *)value_addr); // Wrap a handle around the oop
+      oop val = *(oop *)value_addr;
+#ifdef _LP64
+      if (Universe::is_narrow_oop_base(val)) {
+         // Compiled code may produce decoded oop = narrow_oop_base
+         // when a narrow oop implicit null check is used.
+         // The narrow_oop_base could be NULL or be the address
+         // of the page below heap. Use NULL value for both cases.
+         val = (oop)NULL;
+      }
+#endif
+      Handle h(val); // Wrap a handle around the oop
      return new StackValue(h);
    }
    case Location::addr: {
--- a/hotspot/src/share/vm/utilities/taskqueue.cpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.cpp
@ -64,15 +64,18 @@ bool ParallelTaskTerminator::peek_in_queue_set() {
 }

 void ParallelTaskTerminator::yield() {
+  assert(_offered_termination <= _n_threads, "Invariant");
  os::yield();
 }

 void ParallelTaskTerminator::sleep(uint millis) {
+  assert(_offered_termination <= _n_threads, "Invariant");
  os::sleep(Thread::current(), millis, false);
 }

 bool
 ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
+  assert(_offered_termination < _n_threads, "Invariant");
  Atomic::inc(&_offered_termination);

  uint yield_count = 0;
@ -96,6 +99,7 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
  // Loop waiting for all threads to offer termination or
  // more work.
  while (true) {
+    assert(_offered_termination <= _n_threads, "Invariant");
    // Are all threads offering termination?
    if (_offered_termination == _n_threads) {
      return true;
@ -151,6 +155,7 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
      if (peek_in_queue_set() ||
          (terminator != NULL && terminator->should_exit_termination())) {
        Atomic::dec(&_offered_termination);
+        assert(_offered_termination < _n_threads, "Invariant");
        return false;
      }
    }
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
@ -560,8 +560,14 @@ typedef GenericTaskQueueSet<Task>      OopTaskQueueSet;
 class StarTask {
  void*  _holder;        // either union oop* or narrowOop*
 public:
-  StarTask(narrowOop *p) { _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK); }
-  StarTask(oop *p)       { _holder = (void*)p; }
+  StarTask(narrowOop* p) {
+    assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!");
+    _holder = (void *)((uintptr_t)p | COMPRESSED_OOP_MASK);
+  }
+  StarTask(oop* p)       {
+    assert(((uintptr_t)p & COMPRESSED_OOP_MASK) == 0, "Information loss!");
+    _holder = (void*)p;
+  }
  StarTask()             { _holder = NULL; }
  operator oop*()        { return (oop*)_holder; }
  operator narrowOop*()  {
--- a/hotspot/test/compiler/6826736/Test.java
+++ b/hotspot/test/compiler/6826736/Test.java
@ -0,0 +1,75 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6826736
+ * @summary CMS: core dump with -XX:+UseCompressedOops
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xbatch -XX:+ScavengeALot -XX:+UseCompressedOops -XX:HeapBaseMinAddress=32g -XX:CompileThreshold=100 -XX:CompileOnly=Test.test -XX:-BlockLayoutRotateLoops -XX:LoopUnrollLimit=0 Test
+ */
+
+public class Test {
+    int[] arr;
+    int[] arr2;
+    int test(int r) {
+        for (int i = 0; i < 100; i++) {
+            for (int j = i; j < 100; j++) {
+               int a = 0;
+               for (long k = 0; k < 100; k++) {
+                  a += k;
+               }
+               if (arr != null)
+                   a = arr[j];
+               r += a;
+            }
+        }
+        return r;
+    }
+
+    public static void main(String[] args) {
+        int r = 0;
+        Test t = new Test();
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[100];
+            r = t.test(r);
+        }
+        System.out.println("Warmup 1 is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = null;
+            r = t.test(r);
+        }
+        System.out.println("Warmup 2 is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[100];
+            r = t.test(r);
+        }
+        System.out.println("Warmup is done.");
+        for (int i = 0; i < 100; i++) {
+            t.arr = new int[1000000];
+            t.arr = null;
+            r = t.test(r);
+        }
+    }
+}
--- a/hotspot/test/compiler/6837094/Test.java
+++ b/hotspot/test/compiler/6837094/Test.java
@ -1,5 +1,5 @@
 /*
- * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2009 Google Inc.  All Rights Reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
--- a/hotspot/test/compiler/6851282/Test.java
+++ b/hotspot/test/compiler/6851282/Test.java
@ -0,0 +1,124 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6851282
+ * @summary JIT miscompilation results in null entry in array when using CompressedOops
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseCompressedOops Test
+ */
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class Test {
+  void foo(A a, A[] as) {
+    for (A a1 : as) {
+      B[] filtered = a.c(a1);
+      for (B b : filtered) {
+        if (b == null) {
+          System.out.println("bug: b == null");
+          System.exit(97);
+        }
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+    List<A> as = new ArrayList<A>();
+    for (int i = 0; i < 5000; i++) {
+      List<B> bs = new ArrayList<B>();
+      for (int j = i; j < i + 1000; j++)
+        bs.add(new B(j));
+      as.add(new A(bs.toArray(new B[0])));
+    }
+    new Test().foo(as.get(0), as.subList(1, as.size()).toArray(new A[0]));
+  }
+}
+
+class A {
+  final B[] bs;
+
+  public A(B[] bs) {
+    this.bs = bs;
+  }
+
+  final B[] c(final A a) {
+    return new BoxedArray<B>(bs).filter(new Function<B, Boolean>() {
+      public Boolean apply(B arg) {
+        for (B b : a.bs) {
+          if (b.d == arg.d)
+            return true;
+        }
+        return false;
+      }
+    });
+  }
+}
+
+class BoxedArray<T> {
+
+  private final T[] array;
+
+  BoxedArray(T[] array) {
+    this.array = array;
+  }
+
+  public T[] filter(Function<T, Boolean> function) {
+    boolean[] include = new boolean[array.length];
+    int len = 0;
+    int i = 0;
+    while (i < array.length) {
+      if (function.apply(array[i])) {
+        include[i] = true;
+        len += 1;
+      }
+      i += 1;
+    }
+    T[] result = (T[]) java.lang.reflect.Array.newInstance(array.getClass().getComponentType(), len);
+    len = 0;
+    i = 0;
+    while (len < result.length) {
+      if (include[i]) {
+        result[len] = array[i];
+        len += 1;
+      }
+      i += 1;
+    }
+    return result;
+  }
+}
+
+interface Function<T, R> {
+  R apply(T arg);
+}
+
+class B {
+  final int d;
+  public B(int d) {
+    this.d = d;
+  }
+}
+
--- a/hotspot/test/compiler/6857159/Test6857159.java
+++ b/hotspot/test/compiler/6857159/Test6857159.java
@ -0,0 +1,68 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6857159
+ * @summary local schedule failed with checkcast of Thread.currentThread()
+ *
+ * @run shell Test6857159.sh
+ */
+
+public class Test6857159 extends Thread {
+    static class ct0 extends Test6857159 {
+        public void message() {
+            // System.out.println("message");
+        }
+
+        public void run() {
+             message();
+             ct0 ct = (ct0) Thread.currentThread();
+             ct.message();
+        }
+    }
+    static class ct1 extends ct0 {
+        public void message() {
+            // System.out.println("message");
+        }
+    }
+    static class ct2 extends ct0 {
+        public void message() {
+            // System.out.println("message");
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        for (int i = 0; i < 100000; i++) {
+            Thread t = null;
+            switch (i % 3) {
+              case 0: t = new ct0(); break;
+              case 1: t = new ct1(); break;
+              case 2: t = new ct2(); break;
+            }
+            t.start();
+            t.join();
+        }
+    }
+}
--- a/hotspot/test/compiler/6857159/Test6857159.sh
+++ b/hotspot/test/compiler/6857159/Test6857159.sh
@ -0,0 +1,65 @@
+#!/bin/sh
+# 
+# Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+# 
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+# 
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+# 
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+# 
+# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+# CA 95054 USA or visit www.sun.com if you need additional information or
+# have any questions.
+# 
+# 
+
+if [ "${TESTSRC}" = "" ]
+then
+  echo "TESTSRC not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+echo "TESTSRC=${TESTSRC}"
+if [ "${TESTJAVA}" = "" ]
+then
+  echo "TESTJAVA not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+echo "TESTJAVA=${TESTJAVA}"
+if [ "${TESTCLASSES}" = "" ]
+then
+  echo "TESTCLASSES not set.  Test cannot execute.  Failed."
+  exit 1
+fi
+echo "TESTCLASSES=${TESTCLASSES}"
+echo "CLASSPATH=${CLASSPATH}"
+
+set -x
+
+cp ${TESTSRC}/Test6857159.java .
+cp ${TESTSRC}/Test6857159.sh .
+
+${TESTJAVA}/bin/javac -d . Test6857159.java
+
+${TESTJAVA}/bin/java  ${TESTVMOPTS} -Xbatch -XX:+PrintCompilation -XX:CompileOnly=Test6857159\$ct.run Test6857159 > test.out 2>&1
+
+grep "COMPILE SKIPPED" test.out
+
+result=$?
+if [ $result -eq 1 ]
+then
+  echo "Passed"
+  exit 0
+else
+  echo "Failed"
+  exit 1
+fi
--- a/hotspot/test/compiler/6859338/Test6859338.java
+++ b/hotspot/test/compiler/6859338/Test6859338.java
@ -0,0 +1,51 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6859338
+ * @summary Assertion failure in sharedRuntime.cpp
+ *
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions  -XX:-InlineObjectHash -Xbatch -XX:-ProfileInterpreter Test6859338
+ */
+
+public class Test6859338 {
+    static Object[] o = new Object[] { new Object(), null };
+    public static void main(String[] args) {
+        int total = 0;
+        try {
+            // Exercise the implicit null check in the unverified entry point
+            for (int i = 0; i < 40000; i++) {
+                int limit = o.length;
+                if (i < 20000) limit = 1;
+                for (int j = 0; j < limit; j++) {
+                    total += o[j].hashCode();
+                }
+            }
+
+        } catch (NullPointerException e) {
+            // this is expected.  A true failure causes a crash
+        }
+    }
+}
--- a/hotspot/test/compiler/6860469/Test.java
+++ b/hotspot/test/compiler/6860469/Test.java
@ -0,0 +1,71 @@
+/*
+ * Copyright 2009 Google Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6860469
+ * @summary remix_address_expressions reshapes address expression with bad control
+ *
+ * @run main/othervm -Xcomp -XX:CompileOnly=Test.C Test
+ */
+
+public class Test {
+
+  private static final int H = 16;
+  private static final int F = 9;
+
+  static int[] fl = new int[1 << F];
+
+  static int C(int ll, int f) {
+    int max = -1;
+    int min = H + 1;
+
+    if (ll != 0) {
+      if (ll < min) {
+        min = ll;
+      }
+      if (ll > max) {
+        max = ll;
+      }
+    }
+
+    if (f > max) {
+      f = max;
+    }
+    if (min > f) {
+      min = f;
+    }
+
+    for (int mc = 1 >> max - f; mc <= 0; mc++) {
+      int i = mc << (32 - f);
+      fl[i] = max;
+    }
+
+    return min;
+  }
+
+  public static void main(String argv[]) {
+    C(0, 10);
+  }
+}