Merge

2017-07-05 19:09:18 +02:00 · 2017-07-05 19:09:18 +02:00 · f46969a88a
commit f46969a88a
parent 86b719e366 3a7a4a49dd
491 changed files with 27142 additions and 9103 deletions
--- a/.hgtags-top-repo
+++ b/.hgtags-top-repo
@ -225,3 +225,4 @@ d2dcb110e9dbaf9903c05b211df800e78e4b394e jdk8-b100
 9f74a220677dc265a724515d8e2617548cef62f1 jdk8-b101
 5eb3c1dc348f72a7f84f7d9d07834e8bbe09a799 jdk8-b102
 b7e64be81c8a7690703df5711f4fc2375da8a9cb jdk8-b103
+96c1b9b7524b52c3fcefc90ffad4c767396727c8 jdk8-b104
--- a/README-builds.html
+++ b/README-builds.html
@ -154,7 +154,7 @@
                    </code>
                </blockquote>
                Once you have all the repositories, keep in mind that each
-                repository is it's own independent repository.
+                repository is its own independent repository.
                You can also re-run <code>./get_source.sh</code> anytime to
                pull over all the latest changesets in all the repositories.
                This set of nested repositories has been given the term
@ -241,6 +241,14 @@
                                source code for the OpenJDK Corba functionality
                            </td>
                        </tr>
+                        <tr>
+                            <td>
+                                nashorn
+                            </td>
+                            <td>
+                                source code for the OpenJDK JavaScript implementation
+                            </td>
+                        </tr>
                    </tbody>
                </table>
            </blockquote>
@ -386,7 +394,7 @@
                        <code>--with-boot-jdk</code>.
                    </li>
                    <li>
-                        Insure that GNU make, the Bootstrap JDK,
+                        Ensure that GNU make, the Bootstrap JDK,
                        and the compilers are all
                        in your PATH environment variable
                    </li>
@ -1307,9 +1315,9 @@
                    you will need to modify the makefiles. But for normal file
                    additions or removals, no changes are needed. There are certan
                    exceptions for some native libraries where the source files are spread
-                    over many directories which also contain courses for other
+                    over many directories which also contain sources for other
                    libraries. In these cases it was simply easier to create include lists
-                    rather thane excludes.
+                    rather than excludes.
                </p>

                <p>
@ -1327,14 +1335,14 @@
                <p>
                    <b>Q:</b> 
                    <code>configure</code> provides OpenJDK-specific features such as
-                    <code>--enable-jigsaw</code> or <code>--with-builddeps-server</code>
-                    that are not described in this document. What about those?
+                    <code>--with-builddeps-server</code> that are not
+                    described in this document. What about those? 
                    <br>
                    <b>A:</b>
                    Try them out if you like! But be aware that most of these are 
                    experimental features. 
                    Many of them don't do anything at all at the moment; the option 
-                    is just a placeholder. Other depends on
+                    is just a placeholder. Others depend on
                    pieces of code or infrastructure that is currently 
                    not ready for prime time.
                </p>
@ -1385,24 +1393,6 @@
                    system and some will need to wait until after.
                </p>

-                <p>
-                    <b>Q:</b> What is @GenerateNativeHeaders?
-                    <br>
-                    <b>A:</b> 
-                    To speed up compilation, we added a flag to javac which makes it 
-                    do the job of javah as well, as a by-product; that is, generating
-                    native .h header files. These files are only generated 
-                    if a class contains native methods. However, sometimes 
-                    a class contains no native method,
-                    but still contains constants that native code needs to use. 
-                    The new GenerateNativeHeaders annotation tells javac to
-                    force generation of a
-                    header file in these cases. (We don't want to generate 
-                    native headers for all classes that contains constants 
-                    but no native methods, since
-                    that would slow down the compilation process needlessly.)
-                </p>
-
                <p>
                    <b>Q:</b> 
                    Is anything able to use the results of the new build's default make target?
@ -1429,10 +1419,9 @@
                    What should I do?
                    <br>
                    <b>A:</b>
-                    It might very well be that we have missed to add support for
+                    It might very well be that we have neglected to add support for
                    an option that was actually used from outside the build system.
-                    Email us and we will
-                    add support for it!
+                    Email us and we will add support for it!
                </p>

            </blockquote>
--- a/common/autoconf/generated-configure.sh
+++ b/common/autoconf/generated-configure.sh
@ -29573,7 +29573,7 @@ if test "x$OPENJDK_TARGET_OS" = xsolaris; then
    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
 fi
 if test "x$OPENJDK_TARGET_OS" = xmacosx; then
-    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE"
+    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
    # Setting these parameters makes it an error to link to macosx APIs that are
    # newer than the given OS version and makes the linked binaries compatible even
    # if built on a newer version of the OS.
--- a/common/autoconf/toolchain.m4
+++ b/common/autoconf/toolchain.m4
@ -905,7 +905,7 @@ if test "x$OPENJDK_TARGET_OS" = xsolaris; then
    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
 fi
 if test "x$OPENJDK_TARGET_OS" = xmacosx; then
-    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE"
+    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
    # Setting these parameters makes it an error to link to macosx APIs that are
    # newer than the given OS version and makes the linked binaries compatible even
    # if built on a newer version of the OS.
--- a/corba/.hgtags
+++ b/corba/.hgtags
@ -225,3 +225,4 @@ c8286839d0df04aba819ec4bef12b86babccf30e jdk8-b90
 a013024b07475782f1fa8e196e950b34b4077663 jdk8-b101
 528c7e76eaeee022817ee085668459bc97cf5665 jdk8-b102
 49c4a777fdfd648d4c3fffc940fdb97a23108ca8 jdk8-b103
+d411c60a8c2fe8fdc572af907775e90f7eefd513 jdk8-b104
--- a/hotspot/.hgtags
+++ b/hotspot/.hgtags
@ -369,3 +369,5 @@ c4697c1c448416108743b59118b4a2498b339d0c jdk8-b102
 7f55137d6aa81efc6eb0035813709f2cb6a26b8b hs25-b45
 6f9be7f87b9653e94fd8fb3070891a0cc91b15bf jdk8-b103
 580430d131ccd475e2f2ad4006531b8c4813d102 hs25-b46
+104743074675359cfbf7f4dcd9ab2a5974a16627 jdk8-b104
+c1604d5885a6f2adc0bcea2fa142a8f6bafad2f0 hs25-b47
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java
@ -75,19 +75,19 @@ public class InstanceKlass extends Klass {
    javaFieldsCount      = new CIntField(type.getCIntegerField("_java_fields_count"), 0);
    constants            = new MetadataField(type.getAddressField("_constants"), 0);
    classLoaderData      = type.getAddressField("_class_loader_data");
-    sourceFileName       = type.getAddressField("_source_file_name");
    sourceDebugExtension = type.getAddressField("_source_debug_extension");
    innerClasses         = type.getAddressField("_inner_classes");
+    sourceFileNameIndex  = new CIntField(type.getCIntegerField("_source_file_name_index"), 0);
    nonstaticFieldSize   = new CIntField(type.getCIntegerField("_nonstatic_field_size"), 0);
    staticFieldSize      = new CIntField(type.getCIntegerField("_static_field_size"), 0);
-    staticOopFieldCount   = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0);
+    staticOopFieldCount  = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0);
    nonstaticOopMapSize  = new CIntField(type.getCIntegerField("_nonstatic_oop_map_size"), 0);
    isMarkedDependent    = new CIntField(type.getCIntegerField("_is_marked_dependent"), 0);
    initState            = new CIntField(type.getCIntegerField("_init_state"), 0);
    vtableLen            = new CIntField(type.getCIntegerField("_vtable_len"), 0);
    itableLen            = new CIntField(type.getCIntegerField("_itable_len"), 0);
    breakpoints          = type.getAddressField("_breakpoints");
-    genericSignature     = type.getAddressField("_generic_signature");
+    genericSignatureIndex = new CIntField(type.getCIntegerField("_generic_signature_index"), 0);
    majorVersion         = new CIntField(type.getCIntegerField("_major_version"), 0);
    minorVersion         = new CIntField(type.getCIntegerField("_minor_version"), 0);
    headerSize           = Oop.alignObjectOffset(type.getSize());
@ -134,9 +134,9 @@ public class InstanceKlass extends Klass {
  private static CIntField javaFieldsCount;
  private static MetadataField constants;
  private static AddressField  classLoaderData;
-  private static AddressField  sourceFileName;
  private static AddressField  sourceDebugExtension;
  private static AddressField  innerClasses;
+  private static CIntField sourceFileNameIndex;
  private static CIntField nonstaticFieldSize;
  private static CIntField staticFieldSize;
  private static CIntField staticOopFieldCount;
@ -146,7 +146,7 @@ public class InstanceKlass extends Klass {
  private static CIntField vtableLen;
  private static CIntField itableLen;
  private static AddressField breakpoints;
-  private static AddressField  genericSignature;
+  private static CIntField genericSignatureIndex;
  private static CIntField majorVersion;
  private static CIntField minorVersion;

@ -346,7 +346,7 @@ public class InstanceKlass extends Klass {
  public ConstantPool getConstants()        { return (ConstantPool) constants.getValue(this); }
  public ClassLoaderData getClassLoaderData() { return                ClassLoaderData.instantiateWrapperFor(classLoaderData.getValue(getAddress())); }
  public Oop       getClassLoader()         { return                getClassLoaderData().getClassLoader(); }
-  public Symbol    getSourceFileName()      { return getSymbol(sourceFileName); }
+  public Symbol    getSourceFileName()      { return                getConstants().getSymbolAt(sourceFileNameIndex.getValue(this)); }
  public String    getSourceDebugExtension(){ return                CStringUtilities.getString(sourceDebugExtension.getValue(getAddress())); }
  public long      getNonstaticFieldSize()  { return                nonstaticFieldSize.getValue(this); }
  public long      getStaticOopFieldCount() { return                staticOopFieldCount.getValue(this); }
@ -354,7 +354,7 @@ public class InstanceKlass extends Klass {
  public boolean   getIsMarkedDependent()   { return                isMarkedDependent.getValue(this) != 0; }
  public long      getVtableLen()           { return                vtableLen.getValue(this); }
  public long      getItableLen()           { return                itableLen.getValue(this); }
-  public Symbol    getGenericSignature()    { return getSymbol(genericSignature); }
+  public Symbol    getGenericSignature()    { return                getConstants().getSymbolAt(genericSignatureIndex.getValue(this)); }
  public long      majorVersion()           { return                majorVersion.getValue(this); }
  public long      minorVersion()           { return                minorVersion.getValue(this); }

--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java
@ -92,8 +92,13 @@ public class ClassDump extends Tool {
                    System.err.println("Warning: Can not create class filter!");
                }
            }
-            String outputDirectory = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", ".");
-            setOutputDirectory(outputDirectory);
+
+            // outputDirectory and jarStream are alternatives: setting one closes the other.
+            // If neither is set, use outputDirectory from the System property:
+            if (outputDirectory == null && jarStream == null) {
+                String dirName = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", ".");
+                setOutputDirectory(dirName);
+            }

            // walk through the system dictionary
            SystemDictionary dict = VM.getVM().getSystemDictionary();
--- a/hotspot/make/bsd/makefiles/gcc.make
+++ b/hotspot/make/bsd/makefiles/gcc.make
@ -247,7 +247,7 @@ ifeq ($(USE_CLANG), true)
 # Not yet supported by clang in Xcode 4.6.2
 #  WARNINGS_ARE_ERRORS += -Wno-tautological-constant-out-of-range-compare
  WARNINGS_ARE_ERRORS += -Wno-delete-non-virtual-dtor -Wno-deprecated -Wno-format -Wno-dynamic-class-memaccess
-  WARNINGS_ARE_ERRORS += -Wno-return-type -Wno-empty-body
+  WARNINGS_ARE_ERRORS += -Wno-empty-body
 endif

 WARNING_FLAGS = -Wpointer-arith -Wsign-compare -Wundef
--- a/hotspot/make/hotspot_version
+++ b/hotspot/make/hotspot_version
@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2013

 HS_MAJOR_VER=25
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=46
+HS_BUILD_NUMBER=47

 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -29,6 +29,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
@ -1145,7 +1146,7 @@ void  MacroAssembler::set_narrow_klass(Klass* k, Register d) {
  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
  int klass_index = oop_recorder()->find_index(k);
  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  narrowOop encoded_k = oopDesc::encode_klass(k);
+  narrowOop encoded_k = Klass::encode_klass(k);

  assert_not_delayed();
  // Relocation with special format (see relocInfo_sparc.hpp).
@ -1419,7 +1420,6 @@ void MacroAssembler::verify_oop_subroutine() {
  load_klass(O0_obj, O0_obj);
  // assert((klass != NULL)
  br_null_short(O0_obj, pn, fail);
-  // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers

  wrccr( O5_save_flags ); // Restore CCR's

@ -4089,52 +4089,91 @@ void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
 }

 void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() != NULL)
-    sub(r, G6_heapbase, r);
-  srlx(r, LogKlassAlignmentInBytes, r);
+  assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+  assert(r != G6_heapbase, "bad register choice");
+  set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+  sub(r, G6_heapbase, r);
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    srlx(r, LogKlassAlignmentInBytes, r);
+  }
+  reinit_heapbase();
 }

 void MacroAssembler::encode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() == NULL) {
-    srlx(src, LogKlassAlignmentInBytes, dst);
+  if (src == dst) {
+    encode_klass_not_null(src);
  } else {
-    sub(src, G6_heapbase, dst);
-    srlx(dst, LogKlassAlignmentInBytes, dst);
+    assert (UseCompressedKlassPointers, "must be compressed");
+    assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+    set((intptr_t)Universe::narrow_klass_base(), dst);
+    sub(src, dst, dst);
+    if (Universe::narrow_klass_shift() != 0) {
+      srlx(dst, LogKlassAlignmentInBytes, dst);
+    }
  }
 }

+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null() and reinit_heapbase().  Hence, if
+// the instructions they generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedKlassPointers, "only for compressed klass ptrs");
+  // set + add + set
+  int num_instrs = insts_for_internal_set((intptr_t)Universe::narrow_klass_base()) + 1 +
+    insts_for_internal_set((intptr_t)Universe::narrow_ptrs_base());
+  if (Universe::narrow_klass_shift() == 0) {
+    return num_instrs * BytesPerInstWord;
+  } else { // sllx
+    return (num_instrs + 1) * BytesPerInstWord;
+  }
+}
+
+// !!! If the instructions that get generated here change then function
+// instr_size_for_decode_klass_not_null() needs to get updated.
 void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
  // pd_code_size_limit.
  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(r, LogKlassAlignmentInBytes, r);
-  if (Universe::narrow_klass_base() != NULL)
-    add(r, G6_heapbase, r);
+  assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+  assert(r != G6_heapbase, "bad register choice");
+  set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+  if (Universe::narrow_klass_shift() != 0)
+    sllx(r, LogKlassAlignmentInBytes, r);
+  add(r, G6_heapbase, r);
+  reinit_heapbase();
 }

 void  MacroAssembler::decode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(src, LogKlassAlignmentInBytes, dst);
-  if (Universe::narrow_klass_base() != NULL)
-    add(dst, G6_heapbase, dst);
+  if (src == dst) {
+    decode_klass_not_null(src);
+  } else {
+    // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+    // pd_code_size_limit.
+    assert (UseCompressedKlassPointers, "must be compressed");
+    assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+    if (Universe::narrow_klass_shift() != 0) {
+      assert((src != G6_heapbase) && (dst != G6_heapbase), "bad register choice");
+      set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+      sllx(src, LogKlassAlignmentInBytes, dst);
+      add(dst, G6_heapbase, dst);
+      reinit_heapbase();
+    } else {
+      set((intptr_t)Universe::narrow_klass_base(), dst);
+      add(src, dst, dst);
+    }
+  }
 }

 void MacroAssembler::reinit_heapbase() {
  if (UseCompressedOops || UseCompressedKlassPointers) {
-    AddressLiteral base(Universe::narrow_ptrs_base_addr());
-    load_ptr_contents(base, G6_heapbase);
+    if (Universe::heap() != NULL) {
+      set((intptr_t)Universe::narrow_ptrs_base(), G6_heapbase);
+    } else {
+      AddressLiteral base(Universe::narrow_ptrs_base_addr());
+      load_ptr_contents(base, G6_heapbase);
+    }
  }
 }

--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp
@ -1177,6 +1177,9 @@ public:
  void push_CPU_state();
  void pop_CPU_state();

+  // Returns the byte size of the instructions generated by decode_klass_not_null().
+  static int instr_size_for_decode_klass_not_null();
+
  // if heap base register is used - reinit it with the correct value
  void reinit_heapbase();

--- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -97,7 +97,7 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
    guarantee(Assembler::inv_op2(inst)==Assembler::sethi_op2, "must be sethi");
    if (format() != 0) {
      assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type, "only narrow oops or klasses case");
-      jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : oopDesc::encode_klass((Klass*)x);
+      jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : Klass::encode_klass((Klass*)x);
      inst &= ~Assembler::hi22(-1);
      inst |=  Assembler::hi22((intptr_t)np);
      if (verify_only) {
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -559,10 +559,7 @@ int MachCallDynamicJavaNode::ret_addr_offset() {
    int klass_load_size;
    if (UseCompressedKlassPointers) {
      assert(Universe::heap() != NULL, "java heap should be initialized");
-      if (Universe::narrow_klass_base() == NULL)
-        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
-      else
-        klass_load_size = 3*BytesPerInstWord;
+      klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord;
    } else {
      klass_load_size = 1*BytesPerInstWord;
    }
@ -1663,9 +1660,12 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  if (UseCompressedKlassPointers) {
    assert(Universe::heap() != NULL, "java heap should be initialized");
    st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
-    st->print_cr("\tSLL    R_G5,3,R_G5");
-    if (Universe::narrow_klass_base() != NULL)
-      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    st->print_cr("\tSET    Universe::narrow_klass_base,R_G6_heap_base");
+    if (Universe::narrow_klass_shift() != 0) {
+      st->print_cr("\tSLL    R_G5,3,R_G5");
+    }
+    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    st->print_cr("\tSET    Universe::narrow_ptrs_base,R_G6_heap_base");
  } else {
    st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
  }
@ -2563,10 +2563,7 @@ encode %{
      int klass_load_size;
      if (UseCompressedKlassPointers) {
        assert(Universe::heap() != NULL, "java heap should be initialized");
-        if (Universe::narrow_klass_base() == NULL)
-          klass_load_size = 2*BytesPerInstWord;
-        else
-          klass_load_size = 3*BytesPerInstWord;
+        klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord;
      } else {
        klass_load_size = 1*BytesPerInstWord;
      }
--- a/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -219,13 +219,13 @@ int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
      const int basic = 5*BytesPerInstWord +
                        // shift;add for load_klass (only shift with zero heap based)
                        (UseCompressedKlassPointers ?
-                         ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
+                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
      return basic + slop;
    } else {
      const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
                        // shift;add for load_klass (only shift with zero heap based)
                        (UseCompressedKlassPointers ?
-                         ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
+                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
      return (basic + slop);
    }
  }
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -30,6 +30,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
@ -4810,23 +4811,8 @@ void MacroAssembler::load_klass(Register dst, Register src) {
 }

 void MacroAssembler::load_prototype_header(Register dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedKlassPointers) {
-    assert (Universe::heap() != NULL, "java heap should be initialized");
-    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    if (Universe::narrow_klass_shift() != 0) {
-      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
-    } else {
-      movq(dst, Address(dst, Klass::prototype_header_offset()));
-    }
-  } else
-#endif
-  {
-    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movptr(dst, Address(dst, Klass::prototype_header_offset()));
-  }
+  load_klass(dst, src);
+  movptr(dst, Address(dst, Klass::prototype_header_offset()));
 }

 void MacroAssembler::store_klass(Register dst, Register src) {
@ -4914,7 +4900,7 @@ void MacroAssembler::store_klass_gap(Register dst, Register src) {

 #ifdef ASSERT
 void MacroAssembler::verify_heapbase(const char* msg) {
-  assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
+  assert (UseCompressedOops, "should be compressed");
  assert (Universe::heap() != NULL, "java heap should be initialized");
  if (CheckCompressedOops) {
    Label ok;
@ -5058,69 +5044,80 @@ void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
 }

 void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
-#endif
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(r, r12_heapbase);
-  }
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
+  // Use r12 as a scratch register in which to temporarily load the narrow_klass_base.
+  assert(r != r12_heapbase, "Encoding a klass in r12");
+  mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base());
+  subq(r, r12_heapbase);
  if (Universe::narrow_klass_shift() != 0) {
    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
    shrq(r, LogKlassAlignmentInBytes);
  }
+  reinit_heapbase();
 }

 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
-#endif
-  if (dst != src) {
-    movq(dst, src);
-  }
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(dst, r12_heapbase);
-  }
-  if (Universe::narrow_klass_shift() != 0) {
-    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    shrq(dst, LogKlassAlignmentInBytes);
+  if (dst == src) {
+    encode_klass_not_null(src);
+  } else {
+    mov64(dst, (int64_t)Universe::narrow_klass_base());
+    negq(dst);
+    addq(dst, src);
+    if (Universe::narrow_klass_shift() != 0) {
+      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      shrq(dst, LogKlassAlignmentInBytes);
+    }
  }
 }

+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL).  Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedKlassPointers, "only for compressed klass ptrs");
+  // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
+  return (Universe::narrow_klass_shift() == 0 ? 20 : 24);
+}
+
+// !!! If the instructions that get generated here change then function
+// instr_size_for_decode_klass_not_null() needs to get updated.
 void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  // Note: it will change flags
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert(r != r12_heapbase, "Decoding a klass in r12");
  // Cannot assert, unverified entry point counts instructions (see .ad file)
  // vtableStubs also counts instructions in pd_code_size_limit.
  // Also do not verify_oop as this is called by verify_oop.
  if (Universe::narrow_klass_shift() != 0) {
    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
    shlq(r, LogKlassAlignmentInBytes);
-    if (Universe::narrow_klass_base() != NULL) {
-      addq(r, r12_heapbase);
-    }
-  } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
  }
+  // Use r12 as a scratch register in which to temporarily load the narrow_klass_base.
+  mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base());
+  addq(r, r12_heapbase);
+  reinit_heapbase();
 }

 void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
  // Note: it will change flags
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_shift() != 0) {
-    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
+  if (dst == src) {
+    decode_klass_not_null(dst);
  } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
-    if (dst != src) {
-      movq(dst, src);
+    // Cannot assert, unverified entry point counts instructions (see .ad file)
+    // vtableStubs also counts instructions in pd_code_size_limit.
+    // Also do not verify_oop as this is called by verify_oop.
+
+    mov64(dst, (int64_t)Universe::narrow_klass_base());
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+      leaq(dst, Address(dst, src, Address::times_8, 0));
+    } else {
+      addq(dst, src);
    }
  }
 }
@ -5148,7 +5145,7 @@ void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  int klass_index = oop_recorder()->find_index(k);
  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  mov_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }

 void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
@ -5156,7 +5153,7 @@ void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  int klass_index = oop_recorder()->find_index(k);
  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  mov_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }

 void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
@ -5182,7 +5179,7 @@ void  MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) {
  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  int klass_index = oop_recorder()->find_index(k);
  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }

 void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
@ -5190,14 +5187,23 @@ void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
  int klass_index = oop_recorder()->find_index(k);
  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }

 void MacroAssembler::reinit_heapbase() {
  if (UseCompressedOops || UseCompressedKlassPointers) {
-    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+    if (Universe::heap() != NULL) {
+      if (Universe::narrow_oop_base() == NULL) {
+        MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
+      } else {
+        mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base());
+      }
+    } else {
+      movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+    }
  }
 }
+
 #endif // _LP64


--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
@ -371,6 +371,10 @@ class MacroAssembler: public Assembler {
  void cmp_narrow_klass(Register dst, Klass* k);
  void cmp_narrow_klass(Address dst, Klass* k);

+  // Returns the byte size of the instructions generated by decode_klass_not_null()
+  // when compressed klass pointers are being used.
+  static int instr_size_for_decode_klass_not_null();
+
  // if heap base register is used - reinit it with the correct value
  void reinit_heapbase();

--- a/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -55,9 +55,9 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
    }
  } else {
      if (verify_only) {
-        assert(*(uint32_t*) disp == oopDesc::encode_klass((Klass*)x), "instructions must match");
+        assert(*(uint32_t*) disp == Klass::encode_klass((Klass*)x), "instructions must match");
      } else {
-        *(int32_t*) disp = oopDesc::encode_klass((Klass*)x);
+        *(int32_t*) disp = Klass::encode_klass((Klass*)x);
      }
    }
  } else {
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@ -675,7 +675,6 @@ class StubGenerator: public StubCodeGenerator {
    __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
    __ testptr(rax, rax);
    __ jcc(Assembler::zero, error);              // if klass is NULL it is broken
-    // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers

    // return if everything seems ok
    __ bind(exit);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@ -1021,7 +1021,6 @@ class StubGenerator: public StubCodeGenerator {
    __ load_klass(rax, rax);  // get klass
    __ testptr(rax, rax);
    __ jcc(Assembler::zero, error); // if klass is NULL it is broken
-    // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers

    // return if everything seems ok
    __ bind(exit);
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
@ -849,9 +849,9 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
    address entry = __ pc();

    // rbx,: Method*
-    // rsi: senderSP must preserved for slow path, set SP to it on fast path
-    // rdx: scratch
-    // rdi: scratch
+    // r13: senderSP must preserved for slow path, set SP to it on fast path
+    // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
+    // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)

    Label slow_path;
    // If we need a safepoint check, generate full interpreter entry.
@ -865,8 +865,8 @@ address InterpreterGenerator::generate_CRC32_update_entry() {

    // Load parameters
    const Register crc = rax;  // crc
-    const Register val = rdx;  // source java byte value
-    const Register tbl = rdi;  // scratch
+    const Register val = c_rarg0;  // source java byte value
+    const Register tbl = c_rarg1;  // scratch

    // Arguments are reversed on java expression stack
    __ movl(val, Address(rsp,   wordSize)); // byte value
@ -880,7 +880,7 @@ address InterpreterGenerator::generate_CRC32_update_entry() {

    // _areturn
    __ pop(rdi);                // get return address
-    __ mov(rsp, rsi);           // set sp to sender sp
+    __ mov(rsp, r13);           // set sp to sender sp
    __ jmp(rdi);

    // generate a vanilla native entry as the slow path
@ -919,20 +919,24 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
    const Register crc = c_rarg0;  // crc
    const Register buf = c_rarg1;  // source java byte array address
    const Register len = c_rarg2;  // length
+    const Register off = len;      // offset (never overlaps with 'len')

    // Arguments are reversed on java expression stack
-    __ movl(len,   Address(rsp,   wordSize)); // Length
    // Calculate address of start element
    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
      __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
+      __ addq(buf, off); // + offset
      __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
    } else {
      __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
      __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
+      __ addq(buf, off); // + offset
      __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
    }
+    // Can now load 'len' since we're finished with 'off'
+    __ movl(len, Address(rsp, wordSize)); // Length

    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
    // result in rax
--- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp
@ -211,11 +211,11 @@ int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
  if (is_vtable_stub) {
    // Vtable stub size
    return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedKlassPointers ? 16 : 0);  // 1 leaq can be 3 bytes + 1 long
+           (UseCompressedKlassPointers ?  MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
  } else {
    // Itable stub size
    return (DebugVtables ? 512 : 74) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedKlassPointers ? 32 : 0);  // 2 leaqs
+           (UseCompressedKlassPointers ?  MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
  }
  // In order to tune these parameters, run the JVM with VM options
  // +PrintMiscellaneous and +WizardMode to see information about
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@ -1393,9 +1393,7 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
 {
  if (UseCompressedKlassPointers) {
    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-    if (Universe::narrow_klass_shift() != 0) {
-      st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
-    }
+    st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
    st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
  } else {
    st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
@ -4035,146 +4033,6 @@ operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 sca
  %}
 %}

-operand indirectNarrowKlass(rRegN reg)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(DecodeNKlass reg);
-
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-operand indOffset8NarrowKlass(rRegN reg, immL8 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  format %{ "[$reg + $off (8-bit)]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffset32NarrowKlass(rRegN reg, immL32 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  format %{ "[$reg + $off (32-bit)]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indIndexOffsetNarrowKlass(rRegN reg, rRegL lreg, immL32 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (AddP (DecodeNKlass reg) lreg) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $lreg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indIndexNarrowKlass(rRegN reg, rRegL lreg)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) lreg);
-
-  op_cost(10);
-  format %{"[$reg + $lreg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-operand indIndexScaleNarrowKlass(rRegN reg, rRegL lreg, immI2 scale)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) (LShiftL lreg scale));
-
-  op_cost(10);
-  format %{"[$reg + $lreg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-operand indIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (AddP (DecodeNKlass reg) (LShiftL lreg scale)) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $lreg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale($scale);
-    disp($off);
-  %}
-%}
-
-operand indCompressedKlassOffset(rRegN reg, immL32 off) %{
-  predicate(UseCompressedKlassPointers && (Universe::narrow_klass_shift() == Address::times_8));
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  op_cost(10);
-  format %{"[R12 + $reg << 3 + $off] (compressed klass addressing)" %}
-  interface(MEMORY_INTER) %{
-    base(0xc); // R12
-    index($reg);
-    scale(0x3);
-    disp($off);
-  %}
-%}
-
-operand indPosIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegI idx, immI2 scale)
-%{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  predicate(Universe::narrow_klass_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
-  match(AddP (AddP (DecodeNKlass reg) (LShiftL (ConvI2L idx) scale)) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $idx << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($idx);
-    scale($scale);
-    disp($off);
-  %}
-%}
-
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@ -4345,11 +4203,7 @@ opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
               indCompressedOopOffset,
               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
-               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow,
-               indCompressedKlassOffset,
-               indirectNarrowKlass, indOffset8NarrowKlass, indOffset32NarrowKlass,
-               indIndexOffsetNarrowKlass, indIndexNarrowKlass, indIndexScaleNarrowKlass,
-               indIndexScaleOffsetNarrowKlass, indPosIndexScaleOffsetNarrowKlass);
+               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);

 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@ -6665,7 +6519,7 @@ instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
  match(Set dst (EncodePKlass src));
  effect(KILL cr);
-  format %{ "encode_heap_oop_not_null $dst,$src" %}
+  format %{ "encode_klass_not_null $dst,$src" %}
  ins_encode %{
    __ encode_klass_not_null($dst$$Register, $src$$Register);
  %}
@ -6675,7 +6529,7 @@ instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
  match(Set dst (DecodeNKlass src));
  effect(KILL cr);
-  format %{ "decode_heap_oop_not_null $dst,$src" %}
+  format %{ "decode_klass_not_null $dst,$src" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
--- a/hotspot/src/cpu/zero/vm/assembler_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/assembler_zero.cpp
@ -50,6 +50,7 @@ int AbstractAssembler::code_fill_byte() {
 #ifdef ASSERT
 bool AbstractAssembler::pd_check_instruction_mark() {
  ShouldNotCallThis();
+  return false;
 }
 #endif

@ -73,6 +74,7 @@ void MacroAssembler::advance(int bytes) {
 RegisterOrConstant MacroAssembler::delayed_value_impl(
  intptr_t* delayed_value_addr, Register tmpl, int offset) {
  ShouldNotCallThis();
+  return RegisterOrConstant();
 }

 void MacroAssembler::store_oop(jobject obj) {
--- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp
@ -1008,6 +1008,7 @@ void BytecodeInterpreter::layout_interpreterState(interpreterState istate,

 address CppInterpreter::return_entry(TosState state, int length) {
  ShouldNotCallThis();
+  return NULL;
 }

 address CppInterpreter::deopt_entry(TosState state, int length) {
--- a/hotspot/src/cpu/zero/vm/frame_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/frame_zero.cpp
@ -116,6 +116,7 @@ void frame::patch_pc(Thread* thread, address pc) {

 bool frame::safe_for_sender(JavaThread *thread) {
  ShouldNotCallThis();
+  return false;
 }

 void frame::pd_gc_epilog() {
@ -123,6 +124,7 @@ void frame::pd_gc_epilog() {

 bool frame::is_interpreted_frame_valid(JavaThread *thread) const {
  ShouldNotCallThis();
+  return false;
 }

 BasicType frame::interpreter_frame_result(oop* oop_result,
@ -184,9 +186,8 @@ BasicType frame::interpreter_frame_result(oop* oop_result,
 int frame::frame_size(RegisterMap* map) const {
 #ifdef PRODUCT
  ShouldNotCallThis();
-#else
-  return 0; // make javaVFrame::print_value work
 #endif // PRODUCT
+  return 0; // make javaVFrame::print_value work
 }

 intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
--- a/hotspot/src/cpu/zero/vm/frame_zero.inline.hpp
+++ b/hotspot/src/cpu/zero/vm/frame_zero.inline.hpp
@ -36,7 +36,7 @@ inline frame::frame() {
  _deopt_state = unknown;
 }

-inline address  frame::sender_pc()           const { ShouldNotCallThis();  }
+inline address  frame::sender_pc()           const { ShouldNotCallThis(); return NULL; }

 inline frame::frame(ZeroFrame* zf, intptr_t* sp) {
  _zeroframe = zf;
@ -89,6 +89,7 @@ inline intptr_t* frame::real_fp() const {

 inline intptr_t* frame::link() const {
  ShouldNotCallThis();
+  return NULL;
 }

 #ifdef CC_INTERP
@ -151,14 +152,17 @@ inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {

 inline oop frame::saved_oop_result(RegisterMap* map) const {
  ShouldNotCallThis();
+  return NULL;
 }

 inline bool frame::is_older(intptr_t* id) const {
  ShouldNotCallThis();
+  return false;
 }

 inline intptr_t* frame::entry_frame_argument_at(int offset) const {
  ShouldNotCallThis();
+  return NULL;
 }

 inline intptr_t* frame::unextended_sp() const {
--- a/hotspot/src/cpu/zero/vm/icBuffer_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/icBuffer_zero.cpp
@ -49,8 +49,10 @@ void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin,
 address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
  // NB ic_stub_code_size() must return the size of the code we generate
  ShouldNotCallThis();
+  return NULL;
 }

 void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
  ShouldNotCallThis();
+  return NULL;
 }
--- a/hotspot/src/cpu/zero/vm/interp_masm_zero.hpp
+++ b/hotspot/src/cpu/zero/vm/interp_masm_zero.hpp
@ -40,6 +40,7 @@ class InterpreterMacroAssembler : public MacroAssembler {
                                        Register  tmp,
                                        int       offset) {
    ShouldNotCallThis();
+    return RegisterOrConstant();
  }
 };

--- a/hotspot/src/cpu/zero/vm/interpreter_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/interpreter_zero.cpp
@ -64,6 +64,7 @@ address InterpreterGenerator::generate_math_entry(
    return NULL;

  Unimplemented();
+  return NULL;
 }

 address InterpreterGenerator::generate_abstract_entry() {
--- a/hotspot/src/cpu/zero/vm/nativeInst_zero.hpp
+++ b/hotspot/src/cpu/zero/vm/nativeInst_zero.hpp
@ -51,15 +51,18 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
 public:
  bool is_jump() {
    ShouldNotCallThis();
+    return false;
  }

  bool is_safepoint_poll() {
    ShouldNotCallThis();
+    return false;
  }
 };

 inline NativeInstruction* nativeInstruction_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 class NativeCall : public NativeInstruction {
@ -70,18 +73,22 @@ class NativeCall : public NativeInstruction {

  address instruction_address() const {
    ShouldNotCallThis();
+    return NULL;
  }

  address next_instruction_address() const {
    ShouldNotCallThis();
+    return NULL;
  }

  address return_address() const {
    ShouldNotCallThis();
+    return NULL;
  }

  address destination() const {
    ShouldNotCallThis();
+    return NULL;
  }

  void set_destination_mt_safe(address dest) {
@ -98,25 +105,30 @@ class NativeCall : public NativeInstruction {

  static bool is_call_before(address return_address) {
    ShouldNotCallThis();
+    return false;
  }
 };

 inline NativeCall* nativeCall_before(address return_address) {
  ShouldNotCallThis();
+  return NULL;
 }

 inline NativeCall* nativeCall_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 class NativeMovConstReg : public NativeInstruction {
 public:
  address next_instruction_address() const {
    ShouldNotCallThis();
+    return NULL;
  }

  intptr_t data() const {
    ShouldNotCallThis();
+    return 0;
  }

  void set_data(intptr_t x) {
@ -126,12 +138,14 @@ class NativeMovConstReg : public NativeInstruction {

 inline NativeMovConstReg* nativeMovConstReg_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 class NativeMovRegMem : public NativeInstruction {
 public:
  int offset() const {
    ShouldNotCallThis();
+    return 0;
  }

  void set_offset(intptr_t x) {
@ -145,6 +159,7 @@ class NativeMovRegMem : public NativeInstruction {

 inline NativeMovRegMem* nativeMovRegMem_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 class NativeJump : public NativeInstruction {
@ -155,6 +170,7 @@ class NativeJump : public NativeInstruction {

  address jump_destination() const {
    ShouldNotCallThis();
+    return NULL;
  }

  void set_jump_destination(address dest) {
@ -172,12 +188,14 @@ class NativeJump : public NativeInstruction {

 inline NativeJump* nativeJump_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 class NativeGeneralJump : public NativeInstruction {
 public:
  address jump_destination() const {
    ShouldNotCallThis();
+    return NULL;
  }

  static void insert_unconditional(address code_pos, address entry) {
@ -191,6 +209,7 @@ class NativeGeneralJump : public NativeInstruction {

 inline NativeGeneralJump* nativeGeneralJump_at(address address) {
  ShouldNotCallThis();
+  return NULL;
 }

 #endif // CPU_ZERO_VM_NATIVEINST_ZERO_HPP
--- a/hotspot/src/cpu/zero/vm/register_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/register_zero.cpp
@ -32,8 +32,10 @@ const int ConcreteRegisterImpl::max_fpr =

 const char* RegisterImpl::name() const {
  ShouldNotCallThis();
+  return NULL;
 }

 const char* FloatRegisterImpl::name() const {
  ShouldNotCallThis();
+  return NULL;
 }
--- a/hotspot/src/cpu/zero/vm/relocInfo_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/relocInfo_zero.cpp
@ -37,6 +37,7 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {

 address Relocation::pd_call_destination(address orig_addr) {
  ShouldNotCallThis();
+  return NULL;
 }

 void Relocation::pd_set_call_destination(address x) {
@ -45,6 +46,7 @@ void Relocation::pd_set_call_destination(address x) {

 address Relocation::pd_get_address_from_code() {
  ShouldNotCallThis();
+  return NULL;
 }

 address* Relocation::pd_address_in_code() {
--- a/hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp
@ -89,6 +89,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                            ret_type);
 #else
  ShouldNotCallThis();
+  return NULL;
 #endif // SHARK
 }

@ -99,6 +100,7 @@ int Deoptimization::last_frame_adjust(int callee_parameters,

 uint SharedRuntime::out_preserve_stack_slots() {
  ShouldNotCallThis();
+  return 0;
 }

 JRT_LEAF(void, zero_stub())
@ -135,4 +137,5 @@ int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                         VMRegPair *regs,
                                         int total_args_passed) {
  ShouldNotCallThis();
+  return 0;
 }
--- a/hotspot/src/cpu/zero/vm/vtableStubs_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/vtableStubs_zero.cpp
@ -39,16 +39,20 @@

 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
  ShouldNotCallThis();
+  return NULL;
 }

 VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
  ShouldNotCallThis();
+  return NULL;
 }

 int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
  ShouldNotCallThis();
+  return 0;
 }

 int VtableStub::pd_code_alignment() {
  ShouldNotCallThis();
+  return 0;
 }
--- a/hotspot/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp
+++ b/hotspot/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp
@ -190,7 +190,7 @@ inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v)
 inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store_fence((volatile jlong*)p,  (jlong)v);  }

 inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { *p = v; fence(); }
-inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jdouble_cast(v)); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); }

 inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) {
 #ifdef AMD64
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp
@ -715,6 +715,7 @@ JVM_handle_bsd_signal(int sig,
  err.report_and_die();

  ShouldNotReachHere();
+  return false;
 }

 // From solaris_i486.s ported to bsd_i486.s
--- a/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp
+++ b/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp
@ -66,6 +66,7 @@ address os::current_stack_pointer() {

 frame os::get_sender_for_C_frame(frame* fr) {
  ShouldNotCallThis();
+  return frame();
 }

 frame os::current_frame() {
@ -103,16 +104,19 @@ void os::initialize_thread(Thread* thr) {

 address os::Bsd::ucontext_get_pc(ucontext_t* uc) {
  ShouldNotCallThis();
+  return NULL;
 }

 ExtendedPC os::fetch_frame_from_context(void* ucVoid,
                                        intptr_t** ret_sp,
                                        intptr_t** ret_fp) {
  ShouldNotCallThis();
+  return ExtendedPC();
 }

 frame os::fetch_frame_from_context(void* ucVoid) {
  ShouldNotCallThis();
+  return frame();
 }

 extern "C" JNIEXPORT int
@ -240,6 +244,7 @@ JVM_handle_bsd_signal(int sig,

  sprintf(buf, fmt, sig, info->si_addr);
  fatal(buf);
+  return false;
 }

 void os::Bsd::init_thread_fpu_state(void) {
@ -373,17 +378,7 @@ void os::print_register_info(outputStream *st, void *context) {

 extern "C" {
  int SpinPause() {
-  }
-
-  int SafeFetch32(int *adr, int errValue) {
-    int value = errValue;
-    value = *adr;
-    return value;
-  }
-  intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) {
-    intptr_t value = errValue;
-    value = *adr;
-    return value;
+    return 1;
  }

  void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
--- a/hotspot/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp
+++ b/hotspot/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp
@ -110,6 +110,7 @@
                                           void* ucontext,
                                           bool isInJava) {
    ShouldNotCallThis();
+    return false;
  }

  // These routines are only used on cpu architectures that
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
@ -915,16 +915,6 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
    // Return to the now deoptimized frame.
  }

-  // If we are patching in a non-perm oop, make sure the nmethod
-  // is on the right list.
-  if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) {
-    MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
-    guarantee(nm != NULL, "only nmethods can contain non-perm oops");
-    if (!nm->on_scavenge_root_list())
-      CodeCache::add_scavenge_root_nmethod(nm);
-  }
-
  // Now copy code back

  {
@ -1125,6 +1115,21 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
      }
    }
  }
+
+  // If we are patching in a non-perm oop, make sure the nmethod
+  // is on the right list.
+  if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) {
+    MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
+    guarantee(nm != NULL, "only nmethods can contain non-perm oops");
+    if (!nm->on_scavenge_root_list()) {
+      CodeCache::add_scavenge_root_nmethod(nm);
+    }
+
+    // Since we've patched some oops in the nmethod,
+    // (re)register it with the heap.
+    Universe::heap()->register_nmethod(nm);
+  }
 JRT_END

 //
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
@ -2590,7 +2590,7 @@ void ClassFileParser::parse_classfile_sourcefile_attribute(TRAPS) {
    valid_symbol_at(sourcefile_index),
    "Invalid SourceFile attribute at constant pool index %u in class file %s",
    sourcefile_index, CHECK);
-  set_class_sourcefile(_cp->symbol_at(sourcefile_index));
+  set_class_sourcefile_index(sourcefile_index);
 }


@ -2728,7 +2728,7 @@ void ClassFileParser::parse_classfile_signature_attribute(TRAPS) {
    valid_symbol_at(signature_index),
    "Invalid constant pool index %u in Signature attribute in class file %s",
    signature_index, CHECK);
-  set_class_generic_signature(_cp->symbol_at(signature_index));
+  set_class_generic_signature_index(signature_index);
 }

 void ClassFileParser::parse_classfile_bootstrap_methods_attribute(u4 attribute_byte_length, TRAPS) {
@ -2975,13 +2975,11 @@ void ClassFileParser::parse_classfile_attributes(ClassFileParser::ClassAnnotatio
 void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) {
  if (_synthetic_flag)
    k->set_is_synthetic();
-  if (_sourcefile != NULL) {
-    _sourcefile->increment_refcount();
-    k->set_source_file_name(_sourcefile);
+  if (_sourcefile_index != 0) {
+    k->set_source_file_name_index(_sourcefile_index);
  }
-  if (_generic_signature != NULL) {
-    _generic_signature->increment_refcount();
-    k->set_generic_signature(_generic_signature);
+  if (_generic_signature_index != 0) {
+    k->set_generic_signature_index(_generic_signature_index);
  }
  if (_sde_buffer != NULL) {
    k->set_source_debug_extension(_sde_buffer, _sde_length);
--- a/hotspot/src/share/vm/classfile/classFileParser.hpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.hpp
@ -62,8 +62,8 @@ class ClassFileParser VALUE_OBJ_CLASS_SPEC {
  bool       _synthetic_flag;
  int        _sde_length;
  char*      _sde_buffer;
-  Symbol*    _sourcefile;
-  Symbol*    _generic_signature;
+  u2         _sourcefile_index;
+  u2         _generic_signature_index;

  // Metadata created before the instance klass is created.  Must be deallocated
  // if not transferred to the InstanceKlass upon successful class loading
@ -81,16 +81,16 @@ class ClassFileParser VALUE_OBJ_CLASS_SPEC {
  Array<AnnotationArray*>* _fields_type_annotations;
  InstanceKlass*   _klass;  // InstanceKlass once created.

-  void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
-  void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
-  void set_class_generic_signature(Symbol* x)     { _generic_signature = x; }
-  void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
+  void set_class_synthetic_flag(bool x)        { _synthetic_flag = x; }
+  void set_class_sourcefile_index(u2 x)        { _sourcefile_index = x; }
+  void set_class_generic_signature_index(u2 x) { _generic_signature_index = x; }
+  void set_class_sde_buffer(char* x, int len)  { _sde_buffer = x; _sde_length = len; }

  void init_parsed_class_attributes(ClassLoaderData* loader_data) {
    _loader_data = loader_data;
    _synthetic_flag = false;
-    _sourcefile = NULL;
-    _generic_signature = NULL;
+    _sourcefile_index = 0;
+    _generic_signature_index = 0;
    _sde_buffer = NULL;
    _sde_length = 0;
    // initialize the other flags too:
--- a/hotspot/src/share/vm/code/nmethod.cpp
+++ b/hotspot/src/share/vm/code/nmethod.cpp
@ -687,6 +687,7 @@ nmethod::nmethod(
    code_buffer->copy_values_to(this);
    if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
      CodeCache::add_scavenge_root_nmethod(this);
+      Universe::heap()->register_nmethod(this);
    }
    debug_only(verify_scavenge_root_oops());
    CodeCache::commit(this);
@ -881,6 +882,7 @@ nmethod::nmethod(
    dependencies->copy_to(this);
    if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
      CodeCache::add_scavenge_root_nmethod(this);
+      Universe::heap()->register_nmethod(this);
    }
    debug_only(verify_scavenge_root_oops());

@ -1300,6 +1302,13 @@ bool nmethod::make_not_entrant_or_zombie(unsigned int state) {
  methodHandle the_method(method());
  No_Safepoint_Verifier nsv;

+  // during patching, depending on the nmethod state we must notify the GC that
+  // code has been unloaded, unregistering it. We cannot do this right while
+  // holding the Patching_lock because we need to use the CodeCache_lock. This
+  // would be prone to deadlocks.
+  // This flag is used to remember whether we need to later lock and unregister.
+  bool nmethod_needs_unregister = false;
+
  {
    // invalidate osr nmethod before acquiring the patching lock since
    // they both acquire leaf locks and we don't want a deadlock.
@ -1332,6 +1341,13 @@ bool nmethod::make_not_entrant_or_zombie(unsigned int state) {
      inc_decompile_count();
    }

+    // If the state is becoming a zombie, signal to unregister the nmethod with
+    // the heap.
+    // This nmethod may have already been unloaded during a full GC.
+    if ((state == zombie) && !is_unloaded()) {
+      nmethod_needs_unregister = true;
+    }
+
    // Change state
    _state = state;

@ -1367,6 +1383,9 @@ bool nmethod::make_not_entrant_or_zombie(unsigned int state) {
      // safepoint can sneak in, otherwise the oops used by the
      // dependency logic could have become stale.
      MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+      if (nmethod_needs_unregister) {
+        Universe::heap()->unregister_nmethod(this);
+      }
      flush_dependencies(NULL);
    }

@ -1817,21 +1836,10 @@ void nmethod::metadata_do(void f(Metadata*)) {
  if (_method != NULL) f(_method);
 }

-
-// This method is called twice during GC -- once while
-// tracing the "active" nmethods on thread stacks during
-// the (strong) marking phase, and then again when walking
-// the code cache contents during the weak roots processing
-// phase. The two uses are distinguished by means of the
-// 'do_strong_roots_only' flag, which is true in the first
-// case. We want to walk the weak roots in the nmethod
-// only in the second case. The weak roots in the nmethod
-// are the oops in the ExceptionCache and the InlineCache
-// oops.
-void nmethod::oops_do(OopClosure* f, bool do_strong_roots_only) {
+void nmethod::oops_do(OopClosure* f, bool allow_zombie) {
  // make sure the oops ready to receive visitors
-  assert(!is_zombie() && !is_unloaded(),
-         "should not call follow on zombie or unloaded nmethod");
+  assert(allow_zombie || !is_zombie(), "should not call follow on zombie nmethod");
+  assert(!is_unloaded(), "should not call follow on unloaded nmethod");

  // If the method is not entrant or zombie then a JMP is plastered over the
  // first few bytes.  If an oop in the old code was there, that oop
--- a/hotspot/src/share/vm/code/nmethod.hpp
+++ b/hotspot/src/share/vm/code/nmethod.hpp
@ -566,7 +566,7 @@ public:
  void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
                                     OopClosure* f);
  void oops_do(OopClosure* f) { oops_do(f, false); }
-  void oops_do(OopClosure* f, bool do_strong_roots_only);
+  void oops_do(OopClosure* f, bool allow_zombie);
  bool detect_scavenge_root_oops();
  void verify_scavenge_root_oops() PRODUCT_RETURN;

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -5478,40 +5478,42 @@ CMSParMarkTask::do_young_space_rescan(uint worker_id,
  HandleMark   hm;

  SequentialSubTasksDone* pst = space->par_seq_tasks();
-  assert(pst->valid(), "Uninitialized use?");

  uint nth_task = 0;
  uint n_tasks  = pst->n_tasks();

-  HeapWord *start, *end;
-  while (!pst->is_task_claimed(/* reference */ nth_task)) {
-    // We claimed task # nth_task; compute its boundaries.
-    if (chunk_top == 0) {  // no samples were taken
-      assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
-      start = space->bottom();
-      end   = space->top();
-    } else if (nth_task == 0) {
-      start = space->bottom();
-      end   = chunk_array[nth_task];
-    } else if (nth_task < (uint)chunk_top) {
-      assert(nth_task >= 1, "Control point invariant");
-      start = chunk_array[nth_task - 1];
-      end   = chunk_array[nth_task];
-    } else {
-      assert(nth_task == (uint)chunk_top, "Control point invariant");
-      start = chunk_array[chunk_top - 1];
-      end   = space->top();
+  if (n_tasks > 0) {
+    assert(pst->valid(), "Uninitialized use?");
+    HeapWord *start, *end;
+    while (!pst->is_task_claimed(/* reference */ nth_task)) {
+      // We claimed task # nth_task; compute its boundaries.
+      if (chunk_top == 0) {  // no samples were taken
+        assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
+        start = space->bottom();
+        end   = space->top();
+      } else if (nth_task == 0) {
+        start = space->bottom();
+        end   = chunk_array[nth_task];
+      } else if (nth_task < (uint)chunk_top) {
+        assert(nth_task >= 1, "Control point invariant");
+        start = chunk_array[nth_task - 1];
+        end   = chunk_array[nth_task];
+      } else {
+        assert(nth_task == (uint)chunk_top, "Control point invariant");
+        start = chunk_array[chunk_top - 1];
+        end   = space->top();
+      }
+      MemRegion mr(start, end);
+      // Verify that mr is in space
+      assert(mr.is_empty() || space->used_region().contains(mr),
+             "Should be in space");
+      // Verify that "start" is an object boundary
+      assert(mr.is_empty() || oop(mr.start())->is_oop(),
+             "Should be an oop");
+      space->par_oop_iterate(mr, cl);
    }
-    MemRegion mr(start, end);
-    // Verify that mr is in space
-    assert(mr.is_empty() || space->used_region().contains(mr),
-           "Should be in space");
-    // Verify that "start" is an object boundary
-    assert(mr.is_empty() || oop(mr.start())->is_oop(),
-           "Should be an oop");
-    space->par_oop_iterate(mr, cl);
+    pst->all_tasks_completed();
  }
-  pst->all_tasks_completed();
 }

 void
@ -5788,7 +5790,7 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
  DefNewGeneration* dng = (DefNewGeneration*)_young_gen;

  // Eden space
-  {
+  if (!dng->eden()->is_empty()) {
    SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
    assert(!pst->valid(), "Clobbering existing data?");
    // Each valid entry in [0, _eden_chunk_index) represents a task.
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -4529,7 +4529,7 @@ G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
    _total_prev_live_bytes(0), _total_next_live_bytes(0),
    _hum_used_bytes(0), _hum_capacity_bytes(0),
    _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
-    _total_remset_bytes(0) {
+    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
  MemRegion g1_committed = g1h->g1_committed();
  MemRegion g1_reserved = g1h->g1_reserved();
@ -4553,9 +4553,11 @@ G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
                G1PPRL_BYTE_H_FORMAT
                G1PPRL_BYTE_H_FORMAT
                G1PPRL_DOUBLE_H_FORMAT
+                G1PPRL_BYTE_H_FORMAT
                G1PPRL_BYTE_H_FORMAT,
                "type", "address-range",
-                "used", "prev-live", "next-live", "gc-eff", "remset");
+                "used", "prev-live", "next-live", "gc-eff",
+                "remset", "code-roots");
  _out->print_cr(G1PPRL_LINE_PREFIX
                G1PPRL_TYPE_H_FORMAT
                G1PPRL_ADDR_BASE_H_FORMAT
@ -4563,9 +4565,11 @@ G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
                G1PPRL_BYTE_H_FORMAT
                G1PPRL_BYTE_H_FORMAT
                G1PPRL_DOUBLE_H_FORMAT
+                G1PPRL_BYTE_H_FORMAT
                G1PPRL_BYTE_H_FORMAT,
                "", "",
-                "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", "(bytes)");
+                "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
+                "(bytes)", "(bytes)");
 }

 // It takes as a parameter a reference to one of the _hum_* fields, it
@ -4608,6 +4612,8 @@ bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  size_t next_live_bytes = r->next_live_bytes();
  double gc_eff          = r->gc_efficiency();
  size_t remset_bytes    = r->rem_set()->mem_size();
+  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
+
  if (r->used() == 0) {
    type = "FREE";
  } else if (r->is_survivor()) {
@ -4642,6 +4648,7 @@ bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
  _total_prev_live_bytes += prev_live_bytes;
  _total_next_live_bytes += next_live_bytes;
  _total_remset_bytes    += remset_bytes;
+  _total_strong_code_roots_bytes += strong_code_roots_bytes;

  // Print a line for this particular region.
  _out->print_cr(G1PPRL_LINE_PREFIX
@ -4651,9 +4658,11 @@ bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
                 G1PPRL_BYTE_FORMAT
                 G1PPRL_BYTE_FORMAT
                 G1PPRL_DOUBLE_FORMAT
+                 G1PPRL_BYTE_FORMAT
                 G1PPRL_BYTE_FORMAT,
                 type, bottom, end,
-                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff , remset_bytes);
+                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
+                 remset_bytes, strong_code_roots_bytes);

  return false;
 }
@ -4669,7 +4678,8 @@ G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
                 G1PPRL_SUM_MB_PERC_FORMAT("used")
                 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
                 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
-                 G1PPRL_SUM_MB_FORMAT("remset"),
+                 G1PPRL_SUM_MB_FORMAT("remset")
+                 G1PPRL_SUM_MB_FORMAT("code-roots"),
                 bytes_to_mb(_total_capacity_bytes),
                 bytes_to_mb(_total_used_bytes),
                 perc(_total_used_bytes, _total_capacity_bytes),
@ -4677,6 +4687,7 @@ G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
                 perc(_total_prev_live_bytes, _total_capacity_bytes),
                 bytes_to_mb(_total_next_live_bytes),
                 perc(_total_next_live_bytes, _total_capacity_bytes),
-                 bytes_to_mb(_total_remset_bytes));
+                 bytes_to_mb(_total_remset_bytes),
+                 bytes_to_mb(_total_strong_code_roots_bytes));
  _out->cr();
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -1257,6 +1257,9 @@ private:
  // Accumulator for the remembered set size
  size_t _total_remset_bytes;

+  // Accumulator for strong code roots memory size
+  size_t _total_strong_code_roots_bytes;
+
  static double perc(size_t val, size_t total) {
    if (total == 0) {
      return 0.0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -23,6 +23,7 @@
 */

 #include "precompiled.hpp"
+#include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/bufferingOopClosure.hpp"
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
@ -1176,20 +1177,27 @@ class PostMCRemSetClearClosure: public HeapRegionClosure {
  ModRefBarrierSet* _mr_bs;
 public:
  PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) :
-    _g1h(g1h), _mr_bs(mr_bs) { }
+    _g1h(g1h), _mr_bs(mr_bs) {}
+
  bool doHeapRegion(HeapRegion* r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+
    if (r->continuesHumongous()) {
+      // We'll assert that the strong code root list and RSet is empty
+      assert(hrrs->strong_code_roots_list_length() == 0, "sanity");
+      assert(hrrs->occupied() == 0, "RSet should be empty");
      return false;
    }
+
    _g1h->reset_gc_time_stamps(r);
-    HeapRegionRemSet* hrrs = r->rem_set();
-    if (hrrs != NULL) hrrs->clear();
+    hrrs->clear();
    // You might think here that we could clear just the cards
    // corresponding to the used region.  But no: if we leave a dirty card
    // in a region we might allocate into, then it would prevent that card
    // from being enqueued, and cause it to be missed.
    // Re: the performance cost: we shouldn't be doing full GC anyway!
    _mr_bs->clear(MemRegion(r->bottom(), r->end()));
+
    return false;
  }
 };
@ -1269,30 +1277,6 @@ void G1CollectedHeap::print_hrs_post_compaction() {
  heap_region_iterate(&cl);
 }

-double G1CollectedHeap::verify(bool guard, const char* msg) {
-  double verify_time_ms = 0.0;
-
-  if (guard && total_collections() >= VerifyGCStartAt) {
-    double verify_start = os::elapsedTime();
-    HandleMark hm;  // Discard invalid handles created during verification
-    prepare_for_verify();
-    Universe::verify(VerifyOption_G1UsePrevMarking, msg);
-    verify_time_ms = (os::elapsedTime() - verify_start) * 1000;
-  }
-
-  return verify_time_ms;
-}
-
-void G1CollectedHeap::verify_before_gc() {
-  double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:");
-  g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms);
-}
-
-void G1CollectedHeap::verify_after_gc() {
-  double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:");
-  g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms);
-}
-
 bool G1CollectedHeap::do_collection(bool explicit_gc,
                                    bool clear_all_soft_refs,
                                    size_t word_size) {
@ -1433,7 +1417,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,

      // Delete metaspaces for unloaded class loaders and clean up loader_data graph
      ClassLoaderDataGraph::purge();
-    MetaspaceAux::verify_metrics();
+      MetaspaceAux::verify_metrics();

      // Note: since we've just done a full GC, concurrent
      // marking is no longer active. Therefore we need not
@ -1504,6 +1488,9 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
        heap_region_iterate(&rebuild_rs);
      }

+      // Rebuild the strong code root lists for each region
+      rebuild_strong_code_roots();
+
      if (true) { // FIXME
        MetaspaceGC::compute_new_size();
      }
@ -3109,6 +3096,145 @@ const char* G1CollectedHeap::top_at_mark_start_str(VerifyOption vo) {
  return NULL; // keep some compilers happy
 }

+// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can
+//       pass it as the perm_blk to SharedHeap::process_strong_roots.
+//       When process_strong_roots stop calling perm_blk->younger_refs_iterate
+//       we can change this closure to extend the simpler OopClosure.
+class VerifyRootsClosure: public OopsInGenClosure {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyOption     _vo;
+  bool             _failures;
+public:
+  // _vo == UsePrevMarking -> use "prev" marking information,
+  // _vo == UseNextMarking -> use "next" marking information,
+  // _vo == UseMarkWord    -> use mark word from object header.
+  VerifyRootsClosure(VerifyOption vo) :
+    _g1h(G1CollectedHeap::heap()),
+    _vo(vo),
+    _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  template <class T> void do_oop_nv(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (_g1h->is_obj_dead_cond(obj, _vo)) {
+        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
+                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
+        if (_vo == VerifyOption_G1UseMarkWord) {
+          gclog_or_tty->print_cr("  Mark word: "PTR_FORMAT, (void*)(obj->mark()));
+        }
+        obj->print_on(gclog_or_tty);
+        _failures = true;
+      }
+    }
+  }
+
+  void do_oop(oop* p)       { do_oop_nv(p); }
+  void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+class G1VerifyCodeRootOopClosure: public OopsInGenClosure {
+  G1CollectedHeap* _g1h;
+  OopClosure* _root_cl;
+  nmethod* _nm;
+  VerifyOption _vo;
+  bool _failures;
+
+  template <class T> void do_oop_work(T* p) {
+    // First verify that this root is live
+    _root_cl->do_oop(p);
+
+    if (!G1VerifyHeapRegionCodeRoots) {
+      // We're not verifying the code roots attached to heap region.
+      return;
+    }
+
+    // Don't check the code roots during marking verification in a full GC
+    if (_vo == VerifyOption_G1UseMarkWord) {
+      return;
+    }
+
+    // Now verify that the current nmethod (which contains p) is
+    // in the code root list of the heap region containing the
+    // object referenced by p.
+
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+      // Now fetch the region containing the object
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      HeapRegionRemSet* hrrs = hr->rem_set();
+      // Verify that the strong code root list for this region
+      // contains the nmethod
+      if (!hrrs->strong_code_roots_list_contains(_nm)) {
+        gclog_or_tty->print_cr("Code root location "PTR_FORMAT" "
+                              "from nmethod "PTR_FORMAT" not in strong "
+                              "code roots for region ["PTR_FORMAT","PTR_FORMAT")",
+                              p, _nm, hr->bottom(), hr->end());
+        _failures = true;
+      }
+    }
+  }
+
+public:
+  G1VerifyCodeRootOopClosure(G1CollectedHeap* g1h, OopClosure* root_cl, VerifyOption vo):
+    _g1h(g1h), _root_cl(root_cl), _vo(vo), _nm(NULL), _failures(false) {}
+
+  void do_oop(oop* p) { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+
+  void set_nmethod(nmethod* nm) { _nm = nm; }
+  bool failures() { return _failures; }
+};
+
+class G1VerifyCodeRootBlobClosure: public CodeBlobClosure {
+  G1VerifyCodeRootOopClosure* _oop_cl;
+
+public:
+  G1VerifyCodeRootBlobClosure(G1VerifyCodeRootOopClosure* oop_cl):
+    _oop_cl(oop_cl) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      _oop_cl->set_nmethod(nm);
+      nm->oops_do(_oop_cl);
+    }
+  }
+};
+
+class YoungRefCounterClosure : public OopClosure {
+  G1CollectedHeap* _g1h;
+  int              _count;
+ public:
+  YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {}
+  void do_oop(oop* p)       { if (_g1h->is_in_young(*p)) { _count++; } }
+  void do_oop(narrowOop* p) { ShouldNotReachHere(); }
+
+  int count() { return _count; }
+  void reset_count() { _count = 0; };
+};
+
+class VerifyKlassClosure: public KlassClosure {
+  YoungRefCounterClosure _young_ref_counter_closure;
+  OopClosure *_oop_closure;
+ public:
+  VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
+  void do_klass(Klass* k) {
+    k->oops_do(_oop_closure);
+
+    _young_ref_counter_closure.reset_count();
+    k->oops_do(&_young_ref_counter_closure);
+    if (_young_ref_counter_closure.count() > 0) {
+      guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k));
+    }
+  }
+};
+
 class VerifyLivenessOopClosure: public OopClosure {
  G1CollectedHeap* _g1h;
  VerifyOption _vo;
@ -3242,75 +3368,7 @@ public:
  }
 };

-class YoungRefCounterClosure : public OopClosure {
-  G1CollectedHeap* _g1h;
-  int              _count;
- public:
-  YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {}
-  void do_oop(oop* p)       { if (_g1h->is_in_young(*p)) { _count++; } }
-  void do_oop(narrowOop* p) { ShouldNotReachHere(); }
-
-  int count() { return _count; }
-  void reset_count() { _count = 0; };
-};
-
-class VerifyKlassClosure: public KlassClosure {
-  YoungRefCounterClosure _young_ref_counter_closure;
-  OopClosure *_oop_closure;
- public:
-  VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
-  void do_klass(Klass* k) {
-    k->oops_do(_oop_closure);
-
-    _young_ref_counter_closure.reset_count();
-    k->oops_do(&_young_ref_counter_closure);
-    if (_young_ref_counter_closure.count() > 0) {
-      guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k));
-    }
-  }
-};
-
-// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can
-//       pass it as the perm_blk to SharedHeap::process_strong_roots.
-//       When process_strong_roots stop calling perm_blk->younger_refs_iterate
-//       we can change this closure to extend the simpler OopClosure.
-class VerifyRootsClosure: public OopsInGenClosure {
-private:
-  G1CollectedHeap* _g1h;
-  VerifyOption     _vo;
-  bool             _failures;
-public:
-  // _vo == UsePrevMarking -> use "prev" marking information,
-  // _vo == UseNextMarking -> use "next" marking information,
-  // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyRootsClosure(VerifyOption vo) :
-    _g1h(G1CollectedHeap::heap()),
-    _vo(vo),
-    _failures(false) { }
-
-  bool failures() { return _failures; }
-
-  template <class T> void do_oop_nv(T* p) {
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop)) {
-      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-      if (_g1h->is_obj_dead_cond(obj, _vo)) {
-        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
-                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
-        if (_vo == VerifyOption_G1UseMarkWord) {
-          gclog_or_tty->print_cr("  Mark word: "PTR_FORMAT, (void*)(obj->mark()));
-        }
-        obj->print_on(gclog_or_tty);
-        _failures = true;
-      }
-    }
-  }
-
-  void do_oop(oop* p)       { do_oop_nv(p); }
-  void do_oop(narrowOop* p) { do_oop_nv(p); }
-};
-
-// This is the task used for parallel heap verification.
+// This is the task used for parallel verification of the heap regions

 class G1ParVerifyTask: public AbstractGangTask {
 private:
@ -3344,20 +3402,15 @@ public:
  }
 };

-void G1CollectedHeap::verify(bool silent) {
-  verify(silent, VerifyOption_G1UsePrevMarking);
-}
-
-void G1CollectedHeap::verify(bool silent,
-                             VerifyOption vo) {
+void G1CollectedHeap::verify(bool silent, VerifyOption vo) {
  if (SafepointSynchronize::is_at_safepoint()) {
-    if (!silent) { gclog_or_tty->print("Roots "); }
-    VerifyRootsClosure rootsCl(vo);
-
    assert(Thread::current()->is_VM_thread(),
           "Expected to be executed serially by the VM thread at this point");

-    CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
+    if (!silent) { gclog_or_tty->print("Roots "); }
+    VerifyRootsClosure rootsCl(vo);
+    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
+    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
    VerifyKlassClosure klassCl(this, &rootsCl);

    // We apply the relevant closures to all the oops in the
@ -3376,7 +3429,7 @@ void G1CollectedHeap::verify(bool silent,
                         &klassCl
                         );

-    bool failures = rootsCl.failures();
+    bool failures = rootsCl.failures() || codeRootsCl.failures();

    if (vo != VerifyOption_G1UseMarkWord) {
      // If we're verifying during a full GC then the region sets
@ -3445,6 +3498,34 @@ void G1CollectedHeap::verify(bool silent,
  }
 }

+void G1CollectedHeap::verify(bool silent) {
+  verify(silent, VerifyOption_G1UsePrevMarking);
+}
+
+double G1CollectedHeap::verify(bool guard, const char* msg) {
+  double verify_time_ms = 0.0;
+
+  if (guard && total_collections() >= VerifyGCStartAt) {
+    double verify_start = os::elapsedTime();
+    HandleMark hm;  // Discard invalid handles created during verification
+    prepare_for_verify();
+    Universe::verify(VerifyOption_G1UsePrevMarking, msg);
+    verify_time_ms = (os::elapsedTime() - verify_start) * 1000;
+  }
+
+  return verify_time_ms;
+}
+
+void G1CollectedHeap::verify_before_gc() {
+  double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:");
+  g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms);
+}
+
+void G1CollectedHeap::verify_after_gc() {
+  double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:");
+  g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms);
+}
+
 class PrintRegionClosure: public HeapRegionClosure {
  outputStream* _st;
 public:
@ -3866,8 +3947,9 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
      append_secondary_free_list_if_not_empty_with_lock();
    }

-    assert(check_young_list_well_formed(),
-      "young list should be well formed");
+    assert(check_young_list_well_formed(), "young list should be well formed");
+    assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");

    // Don't dynamically change the number of GC threads this early.  A value of
    // 0 is used to indicate serial work.  When parallel work is done,
@ -4987,7 +5069,11 @@ public:

      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);

-      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+      // Don't scan the scavengable methods in the code cache as part
+      // of strong root scanning. The code roots that point into a
+      // region in the collection set are scanned when we scan the
+      // region's RSet.
+      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings;

      pss.start_strong_roots();
      _g1h->g1_process_strong_roots(/* is scavenging */ true,
@ -5029,67 +5115,6 @@ public:

 // *** Common G1 Evacuation Stuff

-// Closures that support the filtering of CodeBlobs scanned during
-// external root scanning.
-
-// Closure applied to reference fields in code blobs (specifically nmethods)
-// to determine whether an nmethod contains references that point into
-// the collection set. Used as a predicate when walking code roots so
-// that only nmethods that point into the collection set are added to the
-// 'marked' list.
-
-class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
-
-  class G1PointsIntoCSOopClosure : public OopClosure {
-    G1CollectedHeap* _g1;
-    bool _points_into_cs;
-  public:
-    G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
-      _g1(g1), _points_into_cs(false) { }
-
-    bool points_into_cs() const { return _points_into_cs; }
-
-    template <class T>
-    void do_oop_nv(T* p) {
-      if (!_points_into_cs) {
-        T heap_oop = oopDesc::load_heap_oop(p);
-        if (!oopDesc::is_null(heap_oop) &&
-            _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
-          _points_into_cs = true;
-        }
-      }
-    }
-
-    virtual void do_oop(oop* p)        { do_oop_nv(p); }
-    virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-  };
-
-  G1CollectedHeap* _g1;
-
-public:
-  G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
-    CodeBlobToOopClosure(cl, true), _g1(g1) { }
-
-  virtual void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL && !(nm->test_oops_do_mark())) {
-      G1PointsIntoCSOopClosure predicate_cl(_g1);
-      nm->oops_do(&predicate_cl);
-
-      if (predicate_cl.points_into_cs()) {
-        // At least one of the reference fields or the oop relocations
-        // in the nmethod points into the collection set. We have to
-        // 'mark' this nmethod.
-        // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
-        // or MarkingCodeBlobClosure::do_code_blob() change.
-        if (!nm->test_set_oops_do_mark()) {
-          do_newly_marked_nmethod(nm);
-        }
-      }
-    }
-  }
-};
-
 // This method is run in a GC worker.

 void
@ -5107,9 +5132,10 @@ g1_process_strong_roots(bool is_scavenging,

  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);

-  // Walk the code cache w/o buffering, because StarTask cannot handle
-  // unaligned oop locations.
-  G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
+  assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow");
+  // Walk the code cache/strong code roots w/o buffering, because StarTask
+  // cannot handle unaligned oop locations.
+  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */);

  process_strong_roots(false, // no scoping; this is parallel code
                       is_scavenging, so,
@ -5154,9 +5180,22 @@ g1_process_strong_roots(bool is_scavenging,
  }
  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);

+  // If this is an initial mark pause, and we're not scanning
+  // the entire code cache, we need to mark the oops in the
+  // strong code root lists for the regions that are not in
+  // the collection set.
+  // Note all threads participate in this set of root tasks.
+  double mark_strong_code_roots_ms = 0.0;
+  if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) {
+    double mark_strong_roots_start = os::elapsedTime();
+    mark_strong_code_roots(worker_i);
+    mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0;
+  }
+  g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms);
+
  // Now scan the complement of the collection set.
  if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i);
  }
  _process_strong_tasks->all_tasks_completed();
 }
@ -5774,9 +5813,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
  process_discovered_references(n_workers);

  // Weak root processing.
-  // Note: when JSR 292 is enabled and code blobs can contain
-  // non-perm oops then we will need to process the code blobs
-  // here too.
  {
    G1STWIsAliveClosure is_alive(this);
    G1KeepAliveClosure keep_alive(this);
@ -5792,6 +5828,17 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
  hot_card_cache->reset_hot_cache();
  hot_card_cache->set_use_cache(true);

+  // Migrate the strong code roots attached to each region in
+  // the collection set. Ideally we would like to do this
+  // after we have finished the scanning/evacuation of the
+  // strong code roots for a particular heap region.
+  migrate_strong_code_roots();
+
+  if (g1_policy()->during_initial_mark_pause()) {
+    // Reset the claim values set during marking the strong code roots
+    reset_heap_region_claim_values();
+  }
+
  finalize_for_evac_failure();

  if (evacuation_failed()) {
@ -6588,3 +6635,208 @@ void G1CollectedHeap::verify_region_sets() {
  _humongous_set.verify_end();
  _free_list.verify_end();
 }
+
+// Optimized nmethod scanning
+
+class RegisterNMethodOopClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  nmethod* _nm;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      assert(!hr->isHumongous(), "code root in humongous region?");
+
+      // HeapRegion::add_strong_code_root() avoids adding duplicate
+      // entries but having duplicates is  OK since we "mark" nmethods
+      // as visited when we scan the strong code root lists during the GC.
+      hr->add_strong_code_root(_nm);
+      assert(hr->rem_set()->strong_code_roots_list_contains(_nm), "add failed?");
+    }
+  }
+
+public:
+  RegisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) :
+    _g1h(g1h), _nm(nm) {}
+
+  void do_oop(oop* p)       { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+class UnregisterNMethodOopClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  nmethod* _nm;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      assert(!hr->isHumongous(), "code root in humongous region?");
+      hr->remove_strong_code_root(_nm);
+      assert(!hr->rem_set()->strong_code_roots_list_contains(_nm), "remove failed?");
+    }
+  }
+
+public:
+  UnregisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) :
+    _g1h(g1h), _nm(nm) {}
+
+  void do_oop(oop* p)       { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+void G1CollectedHeap::register_nmethod(nmethod* nm) {
+  CollectedHeap::register_nmethod(nm);
+
+  guarantee(nm != NULL, "sanity");
+  RegisterNMethodOopClosure reg_cl(this, nm);
+  nm->oops_do(&reg_cl);
+}
+
+void G1CollectedHeap::unregister_nmethod(nmethod* nm) {
+  CollectedHeap::unregister_nmethod(nm);
+
+  guarantee(nm != NULL, "sanity");
+  UnregisterNMethodOopClosure reg_cl(this, nm);
+  nm->oops_do(&reg_cl, true);
+}
+
+class MigrateCodeRootsHeapRegionClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion *hr) {
+    assert(!hr->isHumongous(), "humongous region in collection set?");
+    hr->migrate_strong_code_roots();
+    return false;
+  }
+};
+
+void G1CollectedHeap::migrate_strong_code_roots() {
+  MigrateCodeRootsHeapRegionClosure cl;
+  double migrate_start = os::elapsedTime();
+  collection_set_iterate(&cl);
+  double migration_time_ms = (os::elapsedTime() - migrate_start) * 1000.0;
+  g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms);
+}
+
+// Mark all the code roots that point into regions *not* in the
+// collection set.
+//
+// Note we do not want to use a "marking" CodeBlobToOopClosure while
+// walking the the code roots lists of regions not in the collection
+// set. Suppose we have an nmethod (M) that points to objects in two
+// separate regions - one in the collection set (R1) and one not (R2).
+// Using a "marking" CodeBlobToOopClosure here would result in "marking"
+// nmethod M when walking the code roots for R1. When we come to scan
+// the code roots for R2, we would see that M is already marked and it
+// would be skipped and the objects in R2 that are referenced from M
+// would not be evacuated.
+
+class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
+
+  class MarkStrongCodeRootOopClosure: public OopClosure {
+    ConcurrentMark* _cm;
+    HeapRegion* _hr;
+    uint _worker_id;
+
+    template <class T> void do_oop_work(T* p) {
+      T heap_oop = oopDesc::load_heap_oop(p);
+      if (!oopDesc::is_null(heap_oop)) {
+        oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+        // Only mark objects in the region (which is assumed
+        // to be not in the collection set).
+        if (_hr->is_in(obj)) {
+          _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
+        }
+      }
+    }
+
+  public:
+    MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) :
+      _cm(cm), _hr(hr), _worker_id(worker_id) {
+      assert(!_hr->in_collection_set(), "sanity");
+    }
+
+    void do_oop(narrowOop* p) { do_oop_work(p); }
+    void do_oop(oop* p)       { do_oop_work(p); }
+  };
+
+  MarkStrongCodeRootOopClosure _oop_cl;
+
+public:
+  MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id):
+    _oop_cl(cm, hr, worker_id) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      nm->oops_do(&_oop_cl);
+    }
+  }
+};
+
+class MarkStrongCodeRootsHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  uint _worker_id;
+
+public:
+  MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) :
+    _g1h(g1h), _worker_id(worker_id) {}
+
+  bool doHeapRegion(HeapRegion *hr) {
+    HeapRegionRemSet* hrrs = hr->rem_set();
+    if (hr->isHumongous()) {
+      // Code roots should never be attached to a humongous region
+      assert(hrrs->strong_code_roots_list_length() == 0, "sanity");
+      return false;
+    }
+
+    if (hr->in_collection_set()) {
+      // Don't mark code roots into regions in the collection set here.
+      // They will be marked when we scan them.
+      return false;
+    }
+
+    MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id);
+    hr->strong_code_roots_do(&cb_cl);
+    return false;
+  }
+};
+
+void G1CollectedHeap::mark_strong_code_roots(uint worker_id) {
+  MarkStrongCodeRootsHRClosure cl(this, worker_id);
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    heap_region_par_iterate_chunked(&cl,
+                                    worker_id,
+                                    workers()->active_workers(),
+                                    HeapRegion::ParMarkRootClaimValue);
+  } else {
+    heap_region_iterate(&cl);
+  }
+}
+
+class RebuildStrongCodeRootClosure: public CodeBlobClosure {
+  G1CollectedHeap* _g1h;
+
+public:
+  RebuildStrongCodeRootClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb != NULL) ? cb->as_nmethod_or_null() : NULL;
+    if (nm == NULL) {
+      return;
+    }
+
+    if (ScavengeRootsInCode && nm->detect_scavenge_root_oops()) {
+      _g1h->register_nmethod(nm);
+    }
+  }
+};
+
+void G1CollectedHeap::rebuild_strong_code_roots() {
+  RebuildStrongCodeRootClosure blob_cl(this);
+  CodeCache::blobs_do(&blob_cl);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -46,6 +46,7 @@
 // may combine concurrent marking with parallel, incremental compaction of
 // heap subsets that will yield large amounts of garbage.

+// Forward declarations
 class HeapRegion;
 class HRRSCleanupTask;
 class GenerationSpec;
@ -69,6 +70,7 @@ class STWGCTimer;
 class G1NewTracer;
 class G1OldTracer;
 class EvacuationFailedInfo;
+class nmethod;

 typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
@ -163,18 +165,6 @@ public:
    : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
 };

-// The G1 STW is alive closure.
-// An instance is embedded into the G1CH and used as the
-// (optional) _is_alive_non_header closure in the STW
-// reference processor. It is also extensively used during
-// reference processing during STW evacuation pauses.
-class G1STWIsAliveClosure: public BoolObjectClosure {
-  G1CollectedHeap* _g1;
-public:
-  G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
-  bool do_object_b(oop p);
-};
-
 class SurvivorGCAllocRegion : public G1AllocRegion {
 protected:
  virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
@ -193,6 +183,18 @@ public:
  : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { }
 };

+// The G1 STW is alive closure.
+// An instance is embedded into the G1CH and used as the
+// (optional) _is_alive_non_header closure in the STW
+// reference processor. It is also extensively used during
+// reference processing during STW evacuation pauses.
+class G1STWIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  bool do_object_b(oop p);
+};
+
 class RefineCardTableEntryClosure;

 class G1CollectedHeap : public SharedHeap {
@ -1549,42 +1551,6 @@ public:

  virtual jlong millis_since_last_gc();

-  // Perform any cleanup actions necessary before allowing a verification.
-  virtual void prepare_for_verify();
-
-  // Perform verification.
-
-  // vo == UsePrevMarking  -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information
-  // vo == UseMarkWord    -> use the mark word in the object header
-  //
-  // NOTE: Only the "prev" marking information is guaranteed to be
-  // consistent most of the time, so most calls to this should use
-  // vo == UsePrevMarking.
-  // Currently, there is only one case where this is called with
-  // vo == UseNextMarking, which is to verify the "next" marking
-  // information at the end of remark.
-  // Currently there is only one place where this is called with
-  // vo == UseMarkWord, which is to verify the marking during a
-  // full GC.
-  void verify(bool silent, VerifyOption vo);
-
-  // Override; it uses the "prev" marking information
-  virtual void verify(bool silent);
-
-  virtual void print_on(outputStream* st) const;
-  virtual void print_extended_on(outputStream* st) const;
-  virtual void print_on_error(outputStream* st) const;
-
-  virtual void print_gc_threads_on(outputStream* st) const;
-  virtual void gc_threads_do(ThreadClosure* tc) const;
-
-  // Override
-  void print_tracing_info() const;
-
-  // The following two methods are helpful for debugging RSet issues.
-  void print_cset_rsets() PRODUCT_RETURN;
-  void print_all_rsets() PRODUCT_RETURN;

  // Convenience function to be used in situations where the heap type can be
  // asserted to be this type.
@ -1661,13 +1627,86 @@ public:
    else return is_obj_ill(obj, hr);
  }

+  bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo);
+  HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo);
+  bool is_marked(oop obj, VerifyOption vo);
+  const char* top_at_mark_start_str(VerifyOption vo);
+
+  ConcurrentMark* concurrent_mark() const { return _cm; }
+
+  // Refinement
+
+  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+
+  // The dirty cards region list is used to record a subset of regions
+  // whose cards need clearing. The list if populated during the
+  // remembered set scanning and drained during the card table
+  // cleanup. Although the methods are reentrant, population/draining
+  // phases must not overlap. For synchronization purposes the last
+  // element on the list points to itself.
+  HeapRegion* _dirty_cards_region_list;
+  void push_dirty_cards_region(HeapRegion* hr);
+  HeapRegion* pop_dirty_cards_region();
+
+  // Optimized nmethod scanning support routines
+
+  // Register the given nmethod with the G1 heap
+  virtual void register_nmethod(nmethod* nm);
+
+  // Unregister the given nmethod from the G1 heap
+  virtual void unregister_nmethod(nmethod* nm);
+
+  // Migrate the nmethods in the code root lists of the regions
+  // in the collection set to regions in to-space. In the event
+  // of an evacuation failure, nmethods that reference objects
+  // that were not successfullly evacuated are not migrated.
+  void migrate_strong_code_roots();
+
+  // During an initial mark pause, mark all the code roots that
+  // point into regions *not* in the collection set.
+  void mark_strong_code_roots(uint worker_id);
+
+  // Rebuild the stong code root lists for each region
+  // after a full GC
+  void rebuild_strong_code_roots();
+
+  // Verification
+
+  // The following is just to alert the verification code
+  // that a full collection has occurred and that the
+  // remembered sets are no longer up to date.
+  bool _full_collection;
+  void set_full_collection() { _full_collection = true;}
+  void clear_full_collection() {_full_collection = false;}
+  bool full_collection() {return _full_collection;}
+
+  // Perform any cleanup actions necessary before allowing a verification.
+  virtual void prepare_for_verify();
+
+  // Perform verification.
+
+  // vo == UsePrevMarking  -> use "prev" marking information,
+  // vo == UseNextMarking -> use "next" marking information
+  // vo == UseMarkWord    -> use the mark word in the object header
+  //
+  // NOTE: Only the "prev" marking information is guaranteed to be
+  // consistent most of the time, so most calls to this should use
+  // vo == UsePrevMarking.
+  // Currently, there is only one case where this is called with
+  // vo == UseNextMarking, which is to verify the "next" marking
+  // information at the end of remark.
+  // Currently there is only one place where this is called with
+  // vo == UseMarkWord, which is to verify the marking during a
+  // full GC.
+  void verify(bool silent, VerifyOption vo);
+
+  // Override; it uses the "prev" marking information
+  virtual void verify(bool silent);
+
  // The methods below are here for convenience and dispatch the
  // appropriate method depending on value of the given VerifyOption
-  // parameter. The options for that parameter are:
-  //
-  // vo == UsePrevMarking -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information,
-  // vo == UseMarkWord    -> use mark word from object header
+  // parameter. The values for that parameter, and their meanings,
+  // are the same as those above.

  bool is_obj_dead_cond(const oop obj,
                        const HeapRegion* hr,
@ -1692,31 +1731,21 @@ public:
    return false; // keep some compilers happy
  }

-  bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo);
-  HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo);
-  bool is_marked(oop obj, VerifyOption vo);
-  const char* top_at_mark_start_str(VerifyOption vo);
+  // Printing

-  // The following is just to alert the verification code
-  // that a full collection has occurred and that the
-  // remembered sets are no longer up to date.
-  bool _full_collection;
-  void set_full_collection() { _full_collection = true;}
-  void clear_full_collection() {_full_collection = false;}
-  bool full_collection() {return _full_collection;}
+  virtual void print_on(outputStream* st) const;
+  virtual void print_extended_on(outputStream* st) const;
+  virtual void print_on_error(outputStream* st) const;

-  ConcurrentMark* concurrent_mark() const { return _cm; }
-  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+  virtual void print_gc_threads_on(outputStream* st) const;
+  virtual void gc_threads_do(ThreadClosure* tc) const;

-  // The dirty cards region list is used to record a subset of regions
-  // whose cards need clearing. The list if populated during the
-  // remembered set scanning and drained during the card table
-  // cleanup. Although the methods are reentrant, population/draining
-  // phases must not overlap. For synchronization purposes the last
-  // element on the list points to itself.
-  HeapRegion* _dirty_cards_region_list;
-  void push_dirty_cards_region(HeapRegion* hr);
-  HeapRegion* pop_dirty_cards_region();
+  // Override
+  void print_tracing_info() const;
+
+  // The following two methods are helpful for debugging RSet issues.
+  void print_cset_rsets() PRODUCT_RETURN;
+  void print_all_rsets() PRODUCT_RETURN;

 public:
  void stop_conc_gc_threads();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@ -161,6 +161,8 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
+  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
+  _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"),
  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
@ -182,6 +184,8 @@ void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
  _last_update_rs_times_ms.reset();
  _last_update_rs_processed_buffers.reset();
  _last_scan_rs_times_ms.reset();
+  _last_strong_code_root_scan_times_ms.reset();
+  _last_strong_code_root_mark_times_ms.reset();
  _last_obj_copy_times_ms.reset();
  _last_termination_times_ms.reset();
  _last_termination_attempts.reset();
@ -197,6 +201,8 @@ void G1GCPhaseTimes::note_gc_end() {
  _last_update_rs_times_ms.verify();
  _last_update_rs_processed_buffers.verify();
  _last_scan_rs_times_ms.verify();
+  _last_strong_code_root_scan_times_ms.verify();
+  _last_strong_code_root_mark_times_ms.verify();
  _last_obj_copy_times_ms.verify();
  _last_termination_times_ms.verify();
  _last_termination_attempts.verify();
@ -210,6 +216,8 @@ void G1GCPhaseTimes::note_gc_end() {
                               _last_satb_filtering_times_ms.get(i) +
                               _last_update_rs_times_ms.get(i) +
                               _last_scan_rs_times_ms.get(i) +
+                               _last_strong_code_root_scan_times_ms.get(i) +
+                               _last_strong_code_root_mark_times_ms.get(i) +
                               _last_obj_copy_times_ms.get(i) +
                               _last_termination_times_ms.get(i);

@ -239,6 +247,9 @@ double G1GCPhaseTimes::accounted_time_ms() {
    // Now subtract the time taken to fix up roots in generated code
    misc_time_ms += _cur_collection_code_root_fixup_time_ms;

+    // Strong code root migration time
+    misc_time_ms += _cur_strong_code_root_migration_time_ms;
+
    // Subtract the time taken to clean the card table from the
    // current value of "other time"
    misc_time_ms += _cur_clear_ct_time_ms;
@ -257,9 +268,13 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
    if (_last_satb_filtering_times_ms.sum() > 0.0) {
      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
    }
+    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
+     _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)");
+    }
    _last_update_rs_times_ms.print(2, "Update RS (ms)");
      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
+    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
    _last_termination_times_ms.print(2, "Termination (ms)");
    if (G1Log::finest()) {
@ -273,12 +288,17 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
    if (_last_satb_filtering_times_ms.sum() > 0.0) {
      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
    }
+    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
+      _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)");
+    }
    _last_update_rs_times_ms.print(1, "Update RS (ms)");
      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
+    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
  }
  print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
+  print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms);
  print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
  double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
  print_stats(1, "Other", misc_time_ms);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@ -119,6 +119,8 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  WorkerDataArray<double> _last_update_rs_times_ms;
  WorkerDataArray<int>    _last_update_rs_processed_buffers;
  WorkerDataArray<double> _last_scan_rs_times_ms;
+  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
+  WorkerDataArray<double> _last_strong_code_root_mark_times_ms;
  WorkerDataArray<double> _last_obj_copy_times_ms;
  WorkerDataArray<double> _last_termination_times_ms;
  WorkerDataArray<size_t> _last_termination_attempts;
@ -128,6 +130,7 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {

  double _cur_collection_par_time_ms;
  double _cur_collection_code_root_fixup_time_ms;
+  double _cur_strong_code_root_migration_time_ms;

  double _cur_clear_ct_time_ms;
  double _cur_ref_proc_time_ms;
@ -179,6 +182,14 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _last_scan_rs_times_ms.set(worker_i, ms);
  }

+  void record_strong_code_root_scan_time(uint worker_i, double ms) {
+    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
+  }
+
+  void record_strong_code_root_mark_time(uint worker_i, double ms) {
+    _last_strong_code_root_mark_times_ms.set(worker_i, ms);
+  }
+
  void record_obj_copy_time(uint worker_i, double ms) {
    _last_obj_copy_times_ms.set(worker_i, ms);
  }
@ -208,6 +219,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _cur_collection_code_root_fixup_time_ms = ms;
  }

+  void record_strong_code_root_migration_time(double ms) {
+    _cur_strong_code_root_migration_time_ms = ms;
+  }
+
  void record_ref_proc_time(double ms) {
    _cur_ref_proc_time_ms = ms;
  }
@ -294,6 +309,14 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    return _last_scan_rs_times_ms.average();
  }

+  double average_last_strong_code_root_scan_time(){
+    return _last_strong_code_root_scan_times_ms.average();
+  }
+
+  double average_last_strong_code_root_mark_time(){
+    return _last_strong_code_root_mark_times_ms.average();
+  }
+
  double average_last_obj_copy_time() {
    return _last_obj_copy_times_ms.average();
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp
@ -262,6 +262,7 @@ void G1MonitoringSupport::update_sizes() {
    old_collection_counters()->update_all();
    young_collection_counters()->update_all();
    MetaspaceCounters::update_performance_counters();
+    CompressedClassSpaceCounters::update_performance_counters();
  }
 }

--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -104,15 +104,25 @@ void CountNonCleanMemRegionClosure::do_MemRegion(MemRegion mr) {
 class ScanRSClosure : public HeapRegionClosure {
  size_t _cards_done, _cards;
  G1CollectedHeap* _g1h;
+
  OopsInHeapRegionClosure* _oc;
+  CodeBlobToOopClosure* _code_root_cl;
+
  G1BlockOffsetSharedArray* _bot_shared;
  CardTableModRefBS *_ct_bs;
-  int _worker_i;
-  int _block_size;
-  bool _try_claimed;
+
+  double _strong_code_root_scan_time_sec;
+  int    _worker_i;
+  int    _block_size;
+  bool   _try_claimed;
+
 public:
-  ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
+  ScanRSClosure(OopsInHeapRegionClosure* oc,
+                CodeBlobToOopClosure* code_root_cl,
+                int worker_i) :
    _oc(oc),
+    _code_root_cl(code_root_cl),
+    _strong_code_root_scan_time_sec(0.0),
    _cards(0),
    _cards_done(0),
    _worker_i(worker_i),
@ -160,6 +170,12 @@ public:
                           card_start, card_start + G1BlockOffsetSharedArray::N_words);
  }

+  void scan_strong_code_roots(HeapRegion* r) {
+    double scan_start = os::elapsedTime();
+    r->strong_code_roots_do(_code_root_cl);
+    _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start);
+  }
+
  bool doHeapRegion(HeapRegion* r) {
    assert(r->in_collection_set(), "should only be called on elements of CS.");
    HeapRegionRemSet* hrrs = r->rem_set();
@ -173,6 +189,7 @@ public:
    //   _try_claimed || r->claim_iter()
    // is true: either we're supposed to work on claimed-but-not-complete
    // regions, or we successfully claimed the region.
+
    HeapRegionRemSetIterator iter(hrrs);
    size_t card_index;

@ -205,30 +222,43 @@ public:
      }
    }
    if (!_try_claimed) {
+      // Scan the strong code root list attached to the current region
+      scan_strong_code_roots(r);
+
      hrrs->set_iter_complete();
    }
    return false;
  }
+
+  double strong_code_root_scan_time_sec() {
+    return _strong_code_root_scan_time_sec;
+  }
+
  size_t cards_done() { return _cards_done;}
  size_t cards_looked_up() { return _cards;}
 };

-void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
+void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
+                      CodeBlobToOopClosure* code_root_cl,
+                      int worker_i) {
  double rs_time_start = os::elapsedTime();
  HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);

-  ScanRSClosure scanRScl(oc, worker_i);
+  ScanRSClosure scanRScl(oc, code_root_cl, worker_i);

  _g1->collection_set_iterate_from(startRegion, &scanRScl);
  scanRScl.set_try_claimed();
  _g1->collection_set_iterate_from(startRegion, &scanRScl);

-  double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
+                            - scanRScl.strong_code_root_scan_time_sec();

-  assert( _cards_scanned != NULL, "invariant" );
+  assert(_cards_scanned != NULL, "invariant");
  _cards_scanned[worker_i] = scanRScl.cards_done();

  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
+                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
 }

 // Closure used for updating RSets and recording references that
@ -288,7 +318,8 @@ void G1RemSet::cleanupHRRS() {
 }

 void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
-                                             int worker_i) {
+                                           CodeBlobToOopClosure* code_root_cl,
+                                           int worker_i) {
 #if CARD_REPEAT_HISTO
  ct_freq_update_histo_and_reset();
 #endif
@ -328,7 +359,7 @@ void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
  }
  if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, worker_i);
+    scanRS(oc, code_root_cl, worker_i);
  } else {
    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@ -81,14 +81,23 @@ public:
  G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
  ~G1RemSet();

-  // Invoke "blk->do_oop" on all pointers into the CS in objects in regions
-  // outside the CS (having invoked "blk->set_region" to set the "from"
-  // region correctly beforehand.) The "worker_i" param is for the
-  // parallel case where the number of the worker thread calling this
-  // function can be helpful in partitioning the work to be done. It
-  // should be the same as the "i" passed to the calling thread's
-  // work(i) function. In the sequential case this param will be ingored.
-  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, int worker_i);
+  // Invoke "blk->do_oop" on all pointers into the collection set
+  // from objects in regions outside the collection set (having
+  // invoked "blk->set_region" to set the "from" region correctly
+  // beforehand.)
+  //
+  // Invoke code_root_cl->do_code_blob on the unmarked nmethods
+  // on the strong code roots list for each region in the
+  // collection set.
+  //
+  // The "worker_i" param is for the parallel case where the id
+  // of the worker thread calling this function can be helpful in
+  // partitioning the work to be done. It should be the same as
+  // the "i" passed to the calling thread's work(i) function.
+  // In the sequential case this param will be ignored.
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   CodeBlobToOopClosure* code_root_cl,
+                                   int worker_i);

  // Prepare for and cleanup after an oops_into_collection_set_do
  // call.  Must call each of these once before and after (in sequential
@ -98,7 +107,10 @@ public:
  void prepare_for_oops_into_collection_set_do();
  void cleanup_after_oops_into_collection_set_do();

-  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanRS(OopsInHeapRegionClosure* oc,
+              CodeBlobToOopClosure* code_root_cl,
+              int worker_i);
+
  void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);

  CardTableModRefBS* ct_bs() { return _ct_bs; }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp
@ -127,32 +127,55 @@ void G1RemSetSummary::subtract_from(G1RemSetSummary* other) {

 class HRRSStatsIter: public HeapRegionClosure {
  size_t _occupied;
-  size_t _total_mem_sz;
-  size_t _max_mem_sz;
-  HeapRegion* _max_mem_sz_region;
+
+  size_t _total_rs_mem_sz;
+  size_t _max_rs_mem_sz;
+  HeapRegion* _max_rs_mem_sz_region;
+
+  size_t _total_code_root_mem_sz;
+  size_t _max_code_root_mem_sz;
+  HeapRegion* _max_code_root_mem_sz_region;
 public:
  HRRSStatsIter() :
    _occupied(0),
-    _total_mem_sz(0),
-    _max_mem_sz(0),
-    _max_mem_sz_region(NULL)
+    _total_rs_mem_sz(0),
+    _max_rs_mem_sz(0),
+    _max_rs_mem_sz_region(NULL),
+    _total_code_root_mem_sz(0),
+    _max_code_root_mem_sz(0),
+    _max_code_root_mem_sz_region(NULL)
  {}

  bool doHeapRegion(HeapRegion* r) {
-    size_t mem_sz = r->rem_set()->mem_size();
-    if (mem_sz > _max_mem_sz) {
-      _max_mem_sz = mem_sz;
-      _max_mem_sz_region = r;
+    HeapRegionRemSet* hrrs = r->rem_set();
+
+    // HeapRegionRemSet::mem_size() includes the
+    // size of the strong code roots
+    size_t rs_mem_sz = hrrs->mem_size();
+    if (rs_mem_sz > _max_rs_mem_sz) {
+      _max_rs_mem_sz = rs_mem_sz;
+      _max_rs_mem_sz_region = r;
    }
-    _total_mem_sz += mem_sz;
-    size_t occ = r->rem_set()->occupied();
+    _total_rs_mem_sz += rs_mem_sz;
+
+    size_t code_root_mem_sz = hrrs->strong_code_roots_mem_size();
+    if (code_root_mem_sz > _max_code_root_mem_sz) {
+      _max_code_root_mem_sz = code_root_mem_sz;
+      _max_code_root_mem_sz_region = r;
+    }
+    _total_code_root_mem_sz += code_root_mem_sz;
+
+    size_t occ = hrrs->occupied();
    _occupied += occ;
    return false;
  }
-  size_t total_mem_sz() { return _total_mem_sz; }
-  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t total_rs_mem_sz() { return _total_rs_mem_sz; }
+  size_t max_rs_mem_sz() { return _max_rs_mem_sz; }
+  HeapRegion* max_rs_mem_sz_region() { return _max_rs_mem_sz_region; }
+  size_t total_code_root_mem_sz() { return _total_code_root_mem_sz; }
+  size_t max_code_root_mem_sz() { return _max_code_root_mem_sz; }
+  HeapRegion* max_code_root_mem_sz_region() { return _max_code_root_mem_sz_region; }
  size_t occupied() { return _occupied; }
-  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
 };

 double calc_percentage(size_t numerator, size_t denominator) {
@ -184,22 +207,33 @@ void G1RemSetSummary::print_on(outputStream* out) {

  HRRSStatsIter blk;
  G1CollectedHeap::heap()->heap_region_iterate(&blk);
+  // RemSet stats
  out->print_cr("  Total heap region rem set sizes = "SIZE_FORMAT"K."
                "  Max = "SIZE_FORMAT"K.",
-                blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+                blk.total_rs_mem_sz()/K, blk.max_rs_mem_sz()/K);
  out->print_cr("  Static structures = "SIZE_FORMAT"K,"
                " free_lists = "SIZE_FORMAT"K.",
                HeapRegionRemSet::static_mem_size() / K,
                HeapRegionRemSet::fl_mem_size() / K);
  out->print_cr("    "SIZE_FORMAT" occupied cards represented.",
                blk.occupied());
-  HeapRegion* max_mem_sz_region = blk.max_mem_sz_region();
-  HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set();
+  HeapRegion* max_rs_mem_sz_region = blk.max_rs_mem_sz_region();
+  HeapRegionRemSet* max_rs_rem_set = max_rs_mem_sz_region->rem_set();
  out->print_cr("    Max size region = "HR_FORMAT", "
                "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
-                HR_FORMAT_PARAMS(max_mem_sz_region),
-                (rem_set->mem_size() + K - 1)/K,
-                (rem_set->occupied() + K - 1)/K);
-
+                HR_FORMAT_PARAMS(max_rs_mem_sz_region),
+                (max_rs_rem_set->mem_size() + K - 1)/K,
+                (max_rs_rem_set->occupied() + K - 1)/K);
  out->print_cr("    Did %d coarsenings.", num_coarsenings());
+  // Strong code root stats
+  out->print_cr("  Total heap region code-root set sizes = "SIZE_FORMAT"K."
+                "  Max = "SIZE_FORMAT"K.",
+                blk.total_code_root_mem_sz()/K, blk.max_code_root_mem_sz()/K);
+  HeapRegion* max_code_root_mem_sz_region = blk.max_code_root_mem_sz_region();
+  HeapRegionRemSet* max_code_root_rem_set = max_code_root_mem_sz_region->rem_set();
+  out->print_cr("    Max size region = "HR_FORMAT", "
+                "size = "SIZE_FORMAT "K, num_elems = "SIZE_FORMAT".",
+                HR_FORMAT_PARAMS(max_code_root_mem_sz_region),
+                (max_code_root_rem_set->strong_code_roots_mem_size() + K - 1)/K,
+                (max_code_root_rem_set->strong_code_roots_list_length()));
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
@ -319,7 +319,10 @@
                                                                            \
  diagnostic(bool, G1VerifyRSetsDuringFullGC, false,                        \
             "If true, perform verification of each heap region's "         \
-             "remembered set when verifying the heap during a full GC.")
+             "remembered set when verifying the heap during a full GC.")    \
+                                                                            \
+  diagnostic(bool, G1VerifyHeapRegionCodeRoots, false,                      \
+             "Verify the code root lists attached to each heap region.")

 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)

--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@ -23,6 +23,7 @@
 */

 #include "precompiled.hpp"
+#include "code/nmethod.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
@ -50,144 +51,6 @@ FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
                                                   OopClosure* oc) :
  _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }

-class VerifyLiveClosure: public OopClosure {
-private:
-  G1CollectedHeap* _g1h;
-  CardTableModRefBS* _bs;
-  oop _containing_obj;
-  bool _failures;
-  int _n_failures;
-  VerifyOption _vo;
-public:
-  // _vo == UsePrevMarking -> use "prev" marking information,
-  // _vo == UseNextMarking -> use "next" marking information,
-  // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) :
-    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
-    _failures(false), _n_failures(0), _vo(vo)
-  {
-    BarrierSet* bs = _g1h->barrier_set();
-    if (bs->is_a(BarrierSet::CardTableModRef))
-      _bs = (CardTableModRefBS*)bs;
-  }
-
-  void set_containing_obj(oop obj) {
-    _containing_obj = obj;
-  }
-
-  bool failures() { return _failures; }
-  int n_failures() { return _n_failures; }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
-  void print_object(outputStream* out, oop obj) {
-#ifdef PRODUCT
-    Klass* k = obj->klass();
-    const char* class_name = InstanceKlass::cast(k)->external_name();
-    out->print_cr("class name %s", class_name);
-#else // PRODUCT
-    obj->print_on(out);
-#endif // PRODUCT
-  }
-
-  template <class T>
-  void do_oop_work(T* p) {
-    assert(_containing_obj != NULL, "Precondition");
-    assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
-           "Precondition");
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop)) {
-      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-      bool failed = false;
-      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
-        MutexLockerEx x(ParGCRareEvent_lock,
-                        Mutex::_no_safepoint_check_flag);
-
-        if (!_failures) {
-          gclog_or_tty->print_cr("");
-          gclog_or_tty->print_cr("----------");
-        }
-        if (!_g1h->is_in_closed_subset(obj)) {
-          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-          gclog_or_tty->print_cr("Field "PTR_FORMAT
-                                 " of live obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 p, (void*) _containing_obj,
-                                 from->bottom(), from->end());
-          print_object(gclog_or_tty, _containing_obj);
-          gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap",
-                                 (void*) obj);
-        } else {
-          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-          HeapRegion* to   = _g1h->heap_region_containing((HeapWord*)obj);
-          gclog_or_tty->print_cr("Field "PTR_FORMAT
-                                 " of live obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 p, (void*) _containing_obj,
-                                 from->bottom(), from->end());
-          print_object(gclog_or_tty, _containing_obj);
-          gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 (void*) obj, to->bottom(), to->end());
-          print_object(gclog_or_tty, obj);
-        }
-        gclog_or_tty->print_cr("----------");
-        gclog_or_tty->flush();
-        _failures = true;
-        failed = true;
-        _n_failures++;
-      }
-
-      if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) {
-        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-        HeapRegion* to   = _g1h->heap_region_containing(obj);
-        if (from != NULL && to != NULL &&
-            from != to &&
-            !to->isHumongous()) {
-          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
-          jbyte cv_field = *_bs->byte_for_const(p);
-          const jbyte dirty = CardTableModRefBS::dirty_card_val();
-
-          bool is_bad = !(from->is_young()
-                          || to->rem_set()->contains_reference(p)
-                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
-                              (_containing_obj->is_objArray() ?
-                                  cv_field == dirty
-                               : cv_obj == dirty || cv_field == dirty));
-          if (is_bad) {
-            MutexLockerEx x(ParGCRareEvent_lock,
-                            Mutex::_no_safepoint_check_flag);
-
-            if (!_failures) {
-              gclog_or_tty->print_cr("");
-              gclog_or_tty->print_cr("----------");
-            }
-            gclog_or_tty->print_cr("Missing rem set entry:");
-            gclog_or_tty->print_cr("Field "PTR_FORMAT" "
-                                   "of obj "PTR_FORMAT", "
-                                   "in region "HR_FORMAT,
-                                   p, (void*) _containing_obj,
-                                   HR_FORMAT_PARAMS(from));
-            _containing_obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
-                                   "in region "HR_FORMAT,
-                                   (void*) obj,
-                                   HR_FORMAT_PARAMS(to));
-            obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
-                          cv_obj, cv_field);
-            gclog_or_tty->print_cr("----------");
-            gclog_or_tty->flush();
-            _failures = true;
-            if (!failed) _n_failures++;
-          }
-        }
-      }
-    }
-  }
-};
-
 template<class ClosureType>
 HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
                               HeapRegion* hr,
@ -368,7 +231,7 @@ void HeapRegion::hr_clear(bool par, bool clear_space) {
  if (!par) {
    // If this is parallel, this will be done later.
    HeapRegionRemSet* hrrs = rem_set();
-    if (hrrs != NULL) hrrs->clear();
+    hrrs->clear();
    _claimed = InitialClaimValue;
  }
  zero_marked_bytes();
@ -505,6 +368,7 @@ HeapRegion::HeapRegion(uint hrs_index,
    _rem_set(NULL), _recorded_rs_length(0), _predicted_elapsed_time_ms(0),
    _predicted_bytes_to_copy(0)
 {
+  _rem_set = new HeapRegionRemSet(sharedOffsetArray, this);
  _orig_end = mr.end();
  // Note that initialize() will set the start of the unmarked area of the
  // region.
@ -512,8 +376,6 @@ HeapRegion::HeapRegion(uint hrs_index,
  set_top(bottom());
  set_saved_mark();

-  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
-
  assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
 }

@ -733,6 +595,160 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
  return NULL;
 }

+// Code roots support
+
+void HeapRegion::add_strong_code_root(nmethod* nm) {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->add_strong_code_root(nm);
+}
+
+void HeapRegion::remove_strong_code_root(nmethod* nm) {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->remove_strong_code_root(nm);
+}
+
+void HeapRegion::migrate_strong_code_roots() {
+  assert(in_collection_set(), "only collection set regions");
+  assert(!isHumongous(), "not humongous regions");
+
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->migrate_strong_code_roots();
+}
+
+void HeapRegion::strong_code_roots_do(CodeBlobClosure* blk) const {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->strong_code_roots_do(blk);
+}
+
+class VerifyStrongCodeRootOopClosure: public OopClosure {
+  const HeapRegion* _hr;
+  nmethod* _nm;
+  bool _failures;
+  bool _has_oops_in_region;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+      // Note: not all the oops embedded in the nmethod are in the
+      // current region. We only look at those which are.
+      if (_hr->is_in(obj)) {
+        // Object is in the region. Check that its less than top
+        if (_hr->top() <= (HeapWord*)obj) {
+          // Object is above top
+          gclog_or_tty->print_cr("Object "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT") is above "
+                                 "top "PTR_FORMAT,
+                                 obj, _hr->bottom(), _hr->end(), _hr->top());
+          _failures = true;
+          return;
+        }
+        // Nmethod has at least one oop in the current region
+        _has_oops_in_region = true;
+      }
+    }
+  }
+
+public:
+  VerifyStrongCodeRootOopClosure(const HeapRegion* hr, nmethod* nm):
+    _hr(hr), _failures(false), _has_oops_in_region(false) {}
+
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(oop* p)       { do_oop_work(p); }
+
+  bool failures()           { return _failures; }
+  bool has_oops_in_region() { return _has_oops_in_region; }
+};
+
+class VerifyStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
+  const HeapRegion* _hr;
+  bool _failures;
+public:
+  VerifyStrongCodeRootCodeBlobClosure(const HeapRegion* hr) :
+    _hr(hr), _failures(false) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      // Verify that the nemthod is live
+      if (!nm->is_alive()) {
+        gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has dead nmethod "
+                               PTR_FORMAT" in its strong code roots",
+                               _hr->bottom(), _hr->end(), nm);
+        _failures = true;
+      } else {
+        VerifyStrongCodeRootOopClosure oop_cl(_hr, nm);
+        nm->oops_do(&oop_cl);
+        if (!oop_cl.has_oops_in_region()) {
+          gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has nmethod "
+                                 PTR_FORMAT" in its strong code roots "
+                                 "with no pointers into region",
+                                 _hr->bottom(), _hr->end(), nm);
+          _failures = true;
+        } else if (oop_cl.failures()) {
+          gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has other "
+                                 "failures for nmethod "PTR_FORMAT,
+                                 _hr->bottom(), _hr->end(), nm);
+          _failures = true;
+        }
+      }
+    }
+  }
+
+  bool failures()       { return _failures; }
+};
+
+void HeapRegion::verify_strong_code_roots(VerifyOption vo, bool* failures) const {
+  if (!G1VerifyHeapRegionCodeRoots) {
+    // We're not verifying code roots.
+    return;
+  }
+  if (vo == VerifyOption_G1UseMarkWord) {
+    // Marking verification during a full GC is performed after class
+    // unloading, code cache unloading, etc so the strong code roots
+    // attached to each heap region are in an inconsistent state. They won't
+    // be consistent until the strong code roots are rebuilt after the
+    // actual GC. Skip verifying the strong code roots in this particular
+    // time.
+    assert(VerifyDuringGC, "only way to get here");
+    return;
+  }
+
+  HeapRegionRemSet* hrrs = rem_set();
+  int strong_code_roots_length = hrrs->strong_code_roots_list_length();
+
+  // if this region is empty then there should be no entries
+  // on its strong code root list
+  if (is_empty()) {
+    if (strong_code_roots_length > 0) {
+      gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is empty "
+                             "but has "INT32_FORMAT" code root entries",
+                             bottom(), end(), strong_code_roots_length);
+      *failures = true;
+    }
+    return;
+  }
+
+  // An H-region should have an empty strong code root list
+  if (isHumongous()) {
+    if (strong_code_roots_length > 0) {
+      gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is humongous "
+                             "but has "INT32_FORMAT" code root entries",
+                             bottom(), end(), strong_code_roots_length);
+      *failures = true;
+    }
+    return;
+  }
+
+  VerifyStrongCodeRootCodeBlobClosure cb_cl(this);
+  strong_code_roots_do(&cb_cl);
+
+  if (cb_cl.failures()) {
+    *failures = true;
+  }
+}
+
 void HeapRegion::print() const { print_on(gclog_or_tty); }
 void HeapRegion::print_on(outputStream* st) const {
  if (isHumongous()) {
@ -761,10 +777,143 @@ void HeapRegion::print_on(outputStream* st) const {
  G1OffsetTableContigSpace::print_on(st);
 }

-void HeapRegion::verify() const {
-  bool dummy = false;
-  verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
-}
+class VerifyLiveClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+  VerifyOption _vo;
+public:
+  // _vo == UsePrevMarking -> use "prev" marking information,
+  // _vo == UseNextMarking -> use "next" marking information,
+  // _vo == UseMarkWord    -> use mark word from object header.
+  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0), _vo(vo)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  void print_object(outputStream* out, oop obj) {
+#ifdef PRODUCT
+    Klass* k = obj->klass();
+    const char* class_name = InstanceKlass::cast(k)->external_name();
+    out->print_cr("class name %s", class_name);
+#else // PRODUCT
+    obj->print_on(out);
+#endif // PRODUCT
+  }
+
+  template <class T>
+  void do_oop_work(T* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
+           "Precondition");
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
+        MutexLockerEx x(ParGCRareEvent_lock,
+                        Mutex::_no_safepoint_check_flag);
+
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                                 " of live obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 p, (void*) _containing_obj,
+                                 from->bottom(), from->end());
+          print_object(gclog_or_tty, _containing_obj);
+          gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap",
+                                 (void*) obj);
+        } else {
+          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+          HeapRegion* to   = _g1h->heap_region_containing((HeapWord*)obj);
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                                 " of live obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 p, (void*) _containing_obj,
+                                 from->bottom(), from->end());
+          print_object(gclog_or_tty, _containing_obj);
+          gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 (void*) obj, to->bottom(), to->end());
+          print_object(gclog_or_tty, obj);
+        }
+        gclog_or_tty->print_cr("----------");
+        gclog_or_tty->flush();
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) {
+        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+        HeapRegion* to   = _g1h->heap_region_containing(obj);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            MutexLockerEx x(ParGCRareEvent_lock,
+                            Mutex::_no_safepoint_check_flag);
+
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT" "
+                                   "of obj "PTR_FORMAT", "
+                                   "in region "HR_FORMAT,
+                                   p, (void*) _containing_obj,
+                                   HR_FORMAT_PARAMS(from));
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
+                                   "in region "HR_FORMAT,
+                                   (void*) obj,
+                                   HR_FORMAT_PARAMS(to));
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            gclog_or_tty->flush();
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};

 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.
@ -904,6 +1053,13 @@ void HeapRegion::verify(VerifyOption vo,
    *failures = true;
    return;
  }
+
+  verify_strong_code_roots(vo, failures);
+}
+
+void HeapRegion::verify() const {
+  bool dummy = false;
+  verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
 }

 // G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
@ -52,6 +52,7 @@ class HeapRegionRemSet;
 class HeapRegionRemSetIterator;
 class HeapRegion;
 class HeapRegionSetBase;
+class nmethod;

 #define HR_FORMAT "%u:(%s)["PTR_FORMAT","PTR_FORMAT","PTR_FORMAT"]"
 #define HR_FORMAT_PARAMS(_hr_) \
@ -371,7 +372,8 @@ class HeapRegion: public G1OffsetTableContigSpace {
    RebuildRSClaimValue        = 5,
    ParEvacFailureClaimValue   = 6,
    AggregateCountClaimValue   = 7,
-    VerifyCountClaimValue      = 8
+    VerifyCountClaimValue      = 8,
+    ParMarkRootClaimValue      = 9
  };

  inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@ -796,6 +798,25 @@ class HeapRegion: public G1OffsetTableContigSpace {

  virtual void reset_after_compaction();

+  // Routines for managing a list of code roots (attached to the
+  // this region's RSet) that point into this heap region.
+  void add_strong_code_root(nmethod* nm);
+  void remove_strong_code_root(nmethod* nm);
+
+  // During a collection, migrate the successfully evacuated
+  // strong code roots that referenced into this region to the
+  // new regions that they now point into. Unsuccessfully
+  // evacuated code roots are not migrated.
+  void migrate_strong_code_roots();
+
+  // Applies blk->do_code_blob() to each of the entries in
+  // the strong code roots list for this region
+  void strong_code_roots_do(CodeBlobClosure* blk) const;
+
+  // Verify that the entries on the strong code root list for this
+  // region are live and include at least one pointer into this region.
+  void verify_strong_code_roots(VerifyOption vo, bool* failures) const;
+
  void print() const;
  void print_on(outputStream* st) const;

--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
@ -33,6 +33,7 @@
 #include "oops/oop.inline.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"

 class PerRegionTable: public CHeapObj<mtGC> {
  friend class OtherRegionsTable;
@ -849,7 +850,7 @@ int HeapRegionRemSet::num_par_rem_sets() {

 HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
                                   HeapRegion* hr)
-  : _bosa(bosa), _other_regions(hr) {
+  : _bosa(bosa), _strong_code_roots_list(NULL), _other_regions(hr) {
  reset_for_par_iteration();
 }

@ -908,6 +909,12 @@ void HeapRegionRemSet::cleanup() {
 }

 void HeapRegionRemSet::clear() {
+  if (_strong_code_roots_list != NULL) {
+    delete _strong_code_roots_list;
+  }
+  _strong_code_roots_list = new (ResourceObj::C_HEAP, mtGC)
+                                GrowableArray<nmethod*>(10, 0, NULL, true);
+
  _other_regions.clear();
  assert(occupied() == 0, "Should be clear.");
  reset_for_par_iteration();
@ -925,6 +932,121 @@ void HeapRegionRemSet::scrub(CardTableModRefBS* ctbs,
  _other_regions.scrub(ctbs, region_bm, card_bm);
 }

+
+// Code roots support
+
+void HeapRegionRemSet::add_strong_code_root(nmethod* nm) {
+  assert(nm != NULL, "sanity");
+  // Search for the code blob from the RHS to avoid
+  // duplicate entries as much as possible
+  if (_strong_code_roots_list->find_from_end(nm) < 0) {
+    // Code blob isn't already in the list
+    _strong_code_roots_list->push(nm);
+  }
+}
+
+void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
+  assert(nm != NULL, "sanity");
+  int idx = _strong_code_roots_list->find(nm);
+  if (idx >= 0) {
+    _strong_code_roots_list->remove_at(idx);
+  }
+  // Check that there were no duplicates
+  guarantee(_strong_code_roots_list->find(nm) < 0, "duplicate entry found");
+}
+
+class NMethodMigrationOopClosure : public OopClosure {
+  G1CollectedHeap* _g1h;
+  HeapRegion* _from;
+  nmethod* _nm;
+
+  uint _num_self_forwarded;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (_from->is_in(obj)) {
+        // Reference still points into the source region.
+        // Since roots are immediately evacuated this means that
+        // we must have self forwarded the object
+        assert(obj->is_forwarded(),
+               err_msg("code roots should be immediately evacuated. "
+                       "Ref: "PTR_FORMAT", "
+                       "Obj: "PTR_FORMAT", "
+                       "Region: "HR_FORMAT,
+                       p, (void*) obj, HR_FORMAT_PARAMS(_from)));
+        assert(obj->forwardee() == obj,
+               err_msg("not self forwarded? obj = "PTR_FORMAT, (void*)obj));
+
+        // The object has been self forwarded.
+        // Note, if we're during an initial mark pause, there is
+        // no need to explicitly mark object. It will be marked
+        // during the regular evacuation failure handling code.
+        _num_self_forwarded++;
+      } else {
+        // The reference points into a promotion or to-space region
+        HeapRegion* to = _g1h->heap_region_containing(obj);
+        to->rem_set()->add_strong_code_root(_nm);
+      }
+    }
+  }
+
+public:
+  NMethodMigrationOopClosure(G1CollectedHeap* g1h, HeapRegion* from, nmethod* nm):
+    _g1h(g1h), _from(from), _nm(nm), _num_self_forwarded(0) {}
+
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(oop* p)       { do_oop_work(p); }
+
+  uint retain() { return _num_self_forwarded > 0; }
+};
+
+void HeapRegionRemSet::migrate_strong_code_roots() {
+  assert(hr()->in_collection_set(), "only collection set regions");
+  assert(!hr()->isHumongous(), "not humongous regions");
+
+  ResourceMark rm;
+
+  // List of code blobs to retain for this region
+  GrowableArray<nmethod*> to_be_retained(10);
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  while (_strong_code_roots_list->is_nonempty()) {
+    nmethod *nm = _strong_code_roots_list->pop();
+    if (nm != NULL) {
+      NMethodMigrationOopClosure oop_cl(g1h, hr(), nm);
+      nm->oops_do(&oop_cl);
+      if (oop_cl.retain()) {
+        to_be_retained.push(nm);
+      }
+    }
+  }
+
+  // Now push any code roots we need to retain
+  assert(to_be_retained.is_empty() || hr()->evacuation_failed(),
+         "Retained nmethod list must be empty or "
+         "evacuation of this region failed");
+
+  while (to_be_retained.is_nonempty()) {
+    nmethod* nm = to_be_retained.pop();
+    assert(nm != NULL, "sanity");
+    add_strong_code_root(nm);
+  }
+}
+
+void HeapRegionRemSet::strong_code_roots_do(CodeBlobClosure* blk) const {
+  for (int i = 0; i < _strong_code_roots_list->length(); i += 1) {
+    nmethod* nm = _strong_code_roots_list->at(i);
+    blk->do_code_blob(nm);
+  }
+}
+
+size_t HeapRegionRemSet::strong_code_roots_mem_size() {
+  return sizeof(GrowableArray<nmethod*>) +
+         _strong_code_roots_list->max_length() * sizeof(nmethod*);
+}
+
 //-------------------- Iteration --------------------

 HeapRegionRemSetIterator:: HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs) :
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
@ -37,6 +37,7 @@ class HeapRegion;
 class HeapRegionRemSetIterator;
 class PerRegionTable;
 class SparsePRT;
+class nmethod;

 // Essentially a wrapper around SparsePRTCleanupTask. See
 // sparsePRT.hpp for more details.
@ -191,6 +192,10 @@ private:
  G1BlockOffsetSharedArray* _bosa;
  G1BlockOffsetSharedArray* bosa() const { return _bosa; }

+  // A list of code blobs (nmethods) whose code contains pointers into
+  // the region that owns this RSet.
+  GrowableArray<nmethod*>* _strong_code_roots_list;
+
  OtherRegionsTable _other_regions;

  enum ParIterState { Unclaimed, Claimed, Complete };
@ -282,11 +287,13 @@ public:
  }

  // The actual # of bytes this hr_remset takes up.
+  // Note also includes the strong code root set.
  size_t mem_size() {
    return _other_regions.mem_size()
      // This correction is necessary because the above includes the second
      // part.
-      + sizeof(this) - sizeof(OtherRegionsTable);
+      + (sizeof(this) - sizeof(OtherRegionsTable))
+      + strong_code_roots_mem_size();
  }

  // Returns the memory occupancy of all static data structures associated
@ -304,6 +311,37 @@ public:
  bool contains_reference(OopOrNarrowOopStar from) const {
    return _other_regions.contains_reference(from);
  }
+
+  // Routines for managing the list of code roots that point into
+  // the heap region that owns this RSet.
+  void add_strong_code_root(nmethod* nm);
+  void remove_strong_code_root(nmethod* nm);
+
+  // During a collection, migrate the successfully evacuated strong
+  // code roots that referenced into the region that owns this RSet
+  // to the RSets of the new regions that they now point into.
+  // Unsuccessfully evacuated code roots are not migrated.
+  void migrate_strong_code_roots();
+
+  // Applies blk->do_code_blob() to each of the entries in
+  // the strong code roots list
+  void strong_code_roots_do(CodeBlobClosure* blk) const;
+
+  // Returns the number of elements in the strong code roots list
+  int strong_code_roots_list_length() {
+    return _strong_code_roots_list->length();
+  }
+
+  // Returns true if the strong code roots contains the given
+  // nmethod.
+  bool strong_code_roots_list_contains(nmethod* nm) {
+    return _strong_code_roots_list->contains(nm);
+  }
+
+  // Returns the amount of memory, in bytes, currently
+  // consumed by the strong code roots.
+  size_t strong_code_roots_mem_size();
+
  void print() const;

  // Called during a stop-world phase to perform any deferred cleanups.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@ -216,6 +216,7 @@ void ParallelScavengeHeap::update_counters() {
  young_gen()->update_counters();
  old_gen()->update_counters();
  MetaspaceCounters::update_performance_counters();
+  CompressedClassSpaceCounters::update_performance_counters();
 }

 size_t ParallelScavengeHeap::capacity() const {
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp
@ -118,6 +118,14 @@ void CollectedHeap::print_heap_after_gc() {
  }
 }

+void CollectedHeap::register_nmethod(nmethod* nm) {
+  assert_locked_or_safepoint(CodeCache_lock);
+}
+
+void CollectedHeap::unregister_nmethod(nmethod* nm) {
+  assert_locked_or_safepoint(CodeCache_lock);
+}
+
 void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) {
  const GCHeapSummary& heap_summary = create_heap_summary();
  const MetaspaceSummary& metaspace_summary = create_metaspace_summary();
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
@ -49,6 +49,7 @@ class MetaspaceSummary;
 class Thread;
 class ThreadClosure;
 class VirtualSpaceSummary;
+class nmethod;

 class GCMessage : public FormatBuffer<1024> {
 public:
@ -603,6 +604,11 @@ class CollectedHeap : public CHeapObj<mtInternal> {
  void print_heap_before_gc();
  void print_heap_after_gc();

+  // Registering and unregistering an nmethod (compiled code) with the heap.
+  // Override with specific mechanism for each specialized heap type.
+  virtual void register_nmethod(nmethod* nm);
+  virtual void unregister_nmethod(nmethod* nm);
+
  void trace_heap_before_gc(GCTracer* gc_tracer);
  void trace_heap_after_gc(GCTracer* gc_tracer);

--- a/hotspot/src/share/vm/memory/filemap.cpp
+++ b/hotspot/src/share/vm/memory/filemap.cpp
@ -362,15 +362,12 @@ bool FileMapInfo::remap_shared_readonly_as_readwrite() {
 ReservedSpace FileMapInfo::reserve_shared_memory() {
  struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[0];
  char* requested_addr = si->_base;
-  size_t alignment = os::vm_allocation_granularity();

-  size_t size = align_size_up(SharedReadOnlySize + SharedReadWriteSize +
-                              SharedMiscDataSize + SharedMiscCodeSize,
-                              alignment);
+  size_t size = FileMapInfo::shared_spaces_size();

  // Reserve the space first, then map otherwise map will go right over some
  // other reserved memory (like the code cache).
-  ReservedSpace rs(size, alignment, false, requested_addr);
+  ReservedSpace rs(size, os::vm_allocation_granularity(), false, requested_addr);
  if (!rs.is_reserved()) {
    fail_continue(err_msg("Unable to reserve shared space at required address " INTPTR_FORMAT, requested_addr));
    return rs;
@ -559,3 +556,19 @@ void FileMapInfo::print_shared_spaces() {
                        si->_base, si->_base + si->_used);
  }
 }
+
+// Unmap mapped regions of shared space.
+void FileMapInfo::stop_sharing_and_unmap(const char* msg) {
+  FileMapInfo *map_info = FileMapInfo::current_info();
+  if (map_info) {
+    map_info->fail_continue(msg);
+    for (int i = 0; i < MetaspaceShared::n_regions; i++) {
+      if (map_info->_header._space[i]._base != NULL) {
+        map_info->unmap_region(i);
+        map_info->_header._space[i]._base = NULL;
+      }
+    }
+  } else if (DumpSharedSpaces) {
+    fail_stop(msg, NULL);
+  }
+}
--- a/hotspot/src/share/vm/memory/filemap.hpp
+++ b/hotspot/src/share/vm/memory/filemap.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -150,6 +150,15 @@ public:
  // Return true if given address is in the mapped shared space.
  bool is_in_shared_space(const void* p) NOT_CDS_RETURN_(false);
  void print_shared_spaces() NOT_CDS_RETURN;
+
+  static size_t shared_spaces_size() {
+    return align_size_up(SharedReadOnlySize + SharedReadWriteSize +
+                         SharedMiscDataSize + SharedMiscCodeSize,
+                         os::vm_allocation_granularity());
+  }
+
+  // Stop CDS sharing and unmap CDS regions.
+  static void stop_sharing_and_unmap(const char* msg);
 };

 #endif // SHARE_VM_MEMORY_FILEMAP_HPP
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp
@ -1211,6 +1211,7 @@ void GenCollectedHeap::gc_epilogue(bool full) {
  }

  MetaspaceCounters::update_performance_counters();
+  CompressedClassSpaceCounters::update_performance_counters();

  always_do_update_barrier = UseConcMarkSweepGC;
 };
--- a/hotspot/src/share/vm/memory/heap.cpp
+++ b/hotspot/src/share/vm/memory/heap.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -118,9 +118,12 @@ bool CodeHeap::reserve(size_t reserved_size, size_t committed_size,
  _number_of_committed_segments = size_to_segments(_memory.committed_size());
  _number_of_reserved_segments  = size_to_segments(_memory.reserved_size());
  assert(_number_of_reserved_segments >= _number_of_committed_segments, "just checking");
+  const size_t reserved_segments_alignment = MAX2((size_t)os::vm_page_size(), granularity);
+  const size_t reserved_segments_size = align_size_up(_number_of_reserved_segments, reserved_segments_alignment);
+  const size_t committed_segments_size = align_to_page_size(_number_of_committed_segments);

  // reserve space for _segmap
-  if (!_segmap.initialize(align_to_page_size(_number_of_reserved_segments), align_to_page_size(_number_of_committed_segments))) {
+  if (!_segmap.initialize(reserved_segments_size, committed_segments_size)) {
    return false;
  }

--- a/hotspot/src/share/vm/memory/iterator.cpp
+++ b/hotspot/src/share/vm/memory/iterator.cpp
@ -64,7 +64,7 @@ void MarkingCodeBlobClosure::do_code_blob(CodeBlob* cb) {
 }

 void CodeBlobToOopClosure::do_newly_marked_nmethod(nmethod* nm) {
-  nm->oops_do(_cl, /*do_strong_roots_only=*/ true);
+  nm->oops_do(_cl, /*allow_zombie=*/ false);
 }

 void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
--- a/hotspot/src/share/vm/memory/metaspace.cpp
+++ b/hotspot/src/share/vm/memory/metaspace.cpp
@ -35,6 +35,7 @@
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/java.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/orderAccess.hpp"
 #include "services/memTracker.hpp"
@ -54,6 +55,8 @@ size_t const allocation_from_dictionary_limit = 64 * K;

 MetaWord* last_allocated = 0;

+size_t Metaspace::_class_metaspace_size;
+
 // Used in declarations in SpaceManager and ChunkManager
 enum ChunkIndex {
  ZeroIndex = 0,
@ -261,10 +264,6 @@ class VirtualSpaceNode : public CHeapObj<mtClass> {
  // count of chunks contained in this VirtualSpace
  uintx _container_count;

-  // Convenience functions for logical bottom and end
-  MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); }
-  MetaWord* end() const { return (MetaWord*) _virtual_space.high(); }
-
  // Convenience functions to access the _virtual_space
  char* low()  const { return virtual_space()->low(); }
  char* high() const { return virtual_space()->high(); }
@ -284,6 +283,10 @@ class VirtualSpaceNode : public CHeapObj<mtClass> {
  VirtualSpaceNode(ReservedSpace rs) : _top(NULL), _next(NULL), _rs(rs), _container_count(0) {}
  ~VirtualSpaceNode();

+  // Convenience functions for logical bottom and end
+  MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); }
+  MetaWord* end() const { return (MetaWord*) _virtual_space.high(); }
+
  // address of next available space in _virtual_space;
  // Accessors
  VirtualSpaceNode* next() { return _next; }
@ -1313,7 +1316,8 @@ bool MetaspaceGC::should_expand(VirtualSpaceList* vsl, size_t word_size) {

  // Class virtual space should always be expanded.  Call GC for the other
  // metadata virtual space.
-  if (vsl == Metaspace::class_space_list()) return true;
+  if (Metaspace::using_class_space() &&
+      (vsl == Metaspace::class_space_list())) return true;

  // If this is part of an allocation after a GC, expand
  // unconditionally.
@ -2257,7 +2261,7 @@ void SpaceManager::deallocate(MetaWord* p, size_t word_size) {
  size_t raw_word_size = get_raw_word_size(word_size);
  size_t min_size = TreeChunk<Metablock, FreeList>::min_size();
  assert(raw_word_size >= min_size,
-    err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size));
+         err_msg("Should not deallocate dark matter " SIZE_FORMAT "<" SIZE_FORMAT, word_size, min_size));
  block_freelists()->return_block(p, raw_word_size);
 }

@ -2374,7 +2378,7 @@ MetaWord* SpaceManager::allocate_work(size_t word_size) {
  if (result == NULL) {
    result = grow_and_allocate(word_size);
  }
-  if (result > 0) {
+  if (result != 0) {
    inc_used_metrics(word_size);
    assert(result != (MetaWord*) chunks_in_use(MediumIndex),
           "Head of the list is being allocated");
@ -2476,15 +2480,13 @@ void SpaceManager::mangle_freed_chunks() {
 size_t MetaspaceAux::_allocated_capacity_words[] = {0, 0};
 size_t MetaspaceAux::_allocated_used_words[] = {0, 0};

+size_t MetaspaceAux::free_bytes(Metaspace::MetadataType mdtype) {
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  return list == NULL ? 0 : list->free_bytes();
+}
+
 size_t MetaspaceAux::free_bytes() {
-  size_t result = 0;
-  if (Metaspace::class_space_list() != NULL) {
-    result = result + Metaspace::class_space_list()->free_bytes();
-  }
-  if (Metaspace::space_list() != NULL) {
-    result = result + Metaspace::space_list()->free_bytes();
-  }
-  return result;
+  return free_bytes(Metaspace::ClassType) + free_bytes(Metaspace::NonClassType);
 }

 void MetaspaceAux::dec_capacity(Metaspace::MetadataType mdtype, size_t words) {
@ -2549,6 +2551,9 @@ size_t MetaspaceAux::free_in_bytes(Metaspace::MetadataType mdtype) {
 }

 size_t MetaspaceAux::capacity_bytes_slow(Metaspace::MetadataType mdtype) {
+  if ((mdtype == Metaspace::ClassType) && !Metaspace::using_class_space()) {
+    return 0;
+  }
  // Don't count the space in the freelists.  That space will be
  // added to the capacity calculation as needed.
  size_t capacity = 0;
@ -2563,18 +2568,18 @@ size_t MetaspaceAux::capacity_bytes_slow(Metaspace::MetadataType mdtype) {
 }

 size_t MetaspaceAux::reserved_in_bytes(Metaspace::MetadataType mdtype) {
-  size_t reserved = (mdtype == Metaspace::ClassType) ?
-                       Metaspace::class_space_list()->virtual_space_total() :
-                       Metaspace::space_list()->virtual_space_total();
-  return reserved * BytesPerWord;
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  return list == NULL ? 0 : list->virtual_space_total();
 }

 size_t MetaspaceAux::min_chunk_size() { return Metaspace::first_chunk_word_size(); }

 size_t MetaspaceAux::free_chunks_total(Metaspace::MetadataType mdtype) {
-  ChunkManager* chunk = (mdtype == Metaspace::ClassType) ?
-                            Metaspace::class_space_list()->chunk_manager() :
-                            Metaspace::space_list()->chunk_manager();
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  if (list == NULL) {
+    return 0;
+  }
+  ChunkManager* chunk = list->chunk_manager();
  chunk->slow_verify();
  return chunk->free_chunks_total();
 }
@ -2615,7 +2620,6 @@ void MetaspaceAux::print_metaspace_change(size_t prev_metadata_used) {

 // This is printed when PrintGCDetails
 void MetaspaceAux::print_on(outputStream* out) {
-  Metaspace::MetadataType ct = Metaspace::ClassType;
  Metaspace::MetadataType nct = Metaspace::NonClassType;

  out->print_cr(" Metaspace total "
@ -2629,12 +2633,15 @@ void MetaspaceAux::print_on(outputStream* out) {
                allocated_capacity_bytes(nct)/K,
                allocated_used_bytes(nct)/K,
                reserved_in_bytes(nct)/K);
-  out->print_cr("  class space    "
-                SIZE_FORMAT "K, used " SIZE_FORMAT "K,"
-                " reserved " SIZE_FORMAT "K",
-                allocated_capacity_bytes(ct)/K,
-                allocated_used_bytes(ct)/K,
-                reserved_in_bytes(ct)/K);
+  if (Metaspace::using_class_space()) {
+    Metaspace::MetadataType ct = Metaspace::ClassType;
+    out->print_cr("  class space    "
+                  SIZE_FORMAT "K, used " SIZE_FORMAT "K,"
+                  " reserved " SIZE_FORMAT "K",
+                  allocated_capacity_bytes(ct)/K,
+                  allocated_used_bytes(ct)/K,
+                  reserved_in_bytes(ct)/K);
+  }
 }

 // Print information for class space and data space separately.
@ -2659,13 +2666,37 @@ void MetaspaceAux::print_on(outputStream* out, Metaspace::MetadataType mdtype) {
  assert(!SafepointSynchronize::is_at_safepoint() || used_and_free == capacity_bytes, "Accounting is wrong");
 }

-// Print total fragmentation for class and data metaspaces separately
-void MetaspaceAux::print_waste(outputStream* out) {
-
-  size_t specialized_waste = 0, small_waste = 0, medium_waste = 0;
-  size_t specialized_count = 0, small_count = 0, medium_count = 0, humongous_count = 0;
+// Print total fragmentation for class metaspaces
+void MetaspaceAux::print_class_waste(outputStream* out) {
+  assert(Metaspace::using_class_space(), "class metaspace not used");
  size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0;
  size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_humongous_count = 0;
+  ClassLoaderDataGraphMetaspaceIterator iter;
+  while (iter.repeat()) {
+    Metaspace* msp = iter.get_next();
+    if (msp != NULL) {
+      cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
+      cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
+      cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex);
+      cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex);
+      cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex);
+      cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex);
+      cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex);
+    }
+  }
+  out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
+                SIZE_FORMAT " small(s) " SIZE_FORMAT ", "
+                SIZE_FORMAT " medium(s) " SIZE_FORMAT ", "
+                "large count " SIZE_FORMAT,
+                cls_specialized_count, cls_specialized_waste,
+                cls_small_count, cls_small_waste,
+                cls_medium_count, cls_medium_waste, cls_humongous_count);
+}
+
+// Print total fragmentation for data and class metaspaces separately
+void MetaspaceAux::print_waste(outputStream* out) {
+  size_t specialized_waste = 0, small_waste = 0, medium_waste = 0;
+  size_t specialized_count = 0, small_count = 0, medium_count = 0, humongous_count = 0;

  ClassLoaderDataGraphMetaspaceIterator iter;
  while (iter.repeat()) {
@ -2678,14 +2709,6 @@ void MetaspaceAux::print_waste(outputStream* out) {
      medium_waste += msp->vsm()->sum_waste_in_chunks_in_use(MediumIndex);
      medium_count += msp->vsm()->sum_count_in_chunks_in_use(MediumIndex);
      humongous_count += msp->vsm()->sum_count_in_chunks_in_use(HumongousIndex);
-
-      cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
-      cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
-      cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex);
-      cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex);
-      cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex);
-      cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex);
-      cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex);
    }
  }
  out->print_cr("Total fragmentation waste (words) doesn't count free space");
@ -2695,13 +2718,9 @@ void MetaspaceAux::print_waste(outputStream* out) {
                        "large count " SIZE_FORMAT,
             specialized_count, specialized_waste, small_count,
             small_waste, medium_count, medium_waste, humongous_count);
-  out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
-                           SIZE_FORMAT " small(s) " SIZE_FORMAT ", "
-                           SIZE_FORMAT " medium(s) " SIZE_FORMAT ", "
-                           "large count " SIZE_FORMAT,
-             cls_specialized_count, cls_specialized_waste,
-             cls_small_count, cls_small_waste,
-             cls_medium_count, cls_medium_waste, cls_humongous_count);
+  if (Metaspace::using_class_space()) {
+    print_class_waste(out);
+  }
 }

 // Dump global metaspace things from the end of ClassLoaderDataGraph
@ -2714,7 +2733,9 @@ void MetaspaceAux::dump(outputStream* out) {

 void MetaspaceAux::verify_free_chunks() {
  Metaspace::space_list()->chunk_manager()->verify();
-  Metaspace::class_space_list()->chunk_manager()->verify();
+  if (Metaspace::using_class_space()) {
+    Metaspace::class_space_list()->chunk_manager()->verify();
+  }
 }

 void MetaspaceAux::verify_capacity() {
@ -2776,7 +2797,9 @@ Metaspace::Metaspace(Mutex* lock, MetaspaceType type) {

 Metaspace::~Metaspace() {
  delete _vsm;
-  delete _class_vsm;
+  if (using_class_space()) {
+    delete _class_vsm;
+  }
 }

 VirtualSpaceList* Metaspace::_space_list = NULL;
@ -2784,9 +2807,123 @@ VirtualSpaceList* Metaspace::_class_space_list = NULL;

 #define VIRTUALSPACEMULTIPLIER 2

+#ifdef _LP64
+void Metaspace::set_narrow_klass_base_and_shift(address metaspace_base, address cds_base) {
+  // Figure out the narrow_klass_base and the narrow_klass_shift.  The
+  // narrow_klass_base is the lower of the metaspace base and the cds base
+  // (if cds is enabled).  The narrow_klass_shift depends on the distance
+  // between the lower base and higher address.
+  address lower_base;
+  address higher_address;
+  if (UseSharedSpaces) {
+    higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()),
+                          (address)(metaspace_base + class_metaspace_size()));
+    lower_base = MIN2(metaspace_base, cds_base);
+  } else {
+    higher_address = metaspace_base + class_metaspace_size();
+    lower_base = metaspace_base;
+  }
+  Universe::set_narrow_klass_base(lower_base);
+  if ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint) {
+    Universe::set_narrow_klass_shift(0);
+  } else {
+    assert(!UseSharedSpaces, "Cannot shift with UseSharedSpaces");
+    Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
+  }
+}
+
+// Return TRUE if the specified metaspace_base and cds_base are close enough
+// to work with compressed klass pointers.
+bool Metaspace::can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base) {
+  assert(cds_base != 0 && UseSharedSpaces, "Only use with CDS");
+  assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs");
+  address lower_base = MIN2((address)metaspace_base, cds_base);
+  address higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()),
+                                (address)(metaspace_base + class_metaspace_size()));
+  return ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint);
+}
+
+// Try to allocate the metaspace at the requested addr.
+void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base) {
+  assert(using_class_space(), "called improperly");
+  assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs");
+  assert(class_metaspace_size() < KlassEncodingMetaspaceMax,
+         "Metaspace size is too big");
+
+  ReservedSpace metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                             os::vm_allocation_granularity(),
+                                             false, requested_addr, 0);
+  if (!metaspace_rs.is_reserved()) {
+    if (UseSharedSpaces) {
+      // Keep trying to allocate the metaspace, increasing the requested_addr
+      // by 1GB each time, until we reach an address that will no longer allow
+      // use of CDS with compressed klass pointers.
+      char *addr = requested_addr;
+      while (!metaspace_rs.is_reserved() && (addr + 1*G > addr) &&
+             can_use_cds_with_metaspace_addr(addr + 1*G, cds_base)) {
+        addr = addr + 1*G;
+        metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                     os::vm_allocation_granularity(), false, addr, 0);
+      }
+    }
+
+    // If no successful allocation then try to allocate the space anywhere.  If
+    // that fails then OOM doom.  At this point we cannot try allocating the
+    // metaspace as if UseCompressedKlassPointers is off because too much
+    // initialization has happened that depends on UseCompressedKlassPointers.
+    // So, UseCompressedKlassPointers cannot be turned off at this point.
+    if (!metaspace_rs.is_reserved()) {
+      metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                   os::vm_allocation_granularity(), false);
+      if (!metaspace_rs.is_reserved()) {
+        vm_exit_during_initialization(err_msg("Could not allocate metaspace: %d bytes",
+                                              class_metaspace_size()));
+      }
+    }
+  }
+
+  // If we got here then the metaspace got allocated.
+  MemTracker::record_virtual_memory_type((address)metaspace_rs.base(), mtClass);
+
+  // Verify that we can use shared spaces.  Otherwise, turn off CDS.
+  if (UseSharedSpaces && !can_use_cds_with_metaspace_addr(metaspace_rs.base(), cds_base)) {
+    FileMapInfo::stop_sharing_and_unmap(
+        "Could not allocate metaspace at a compatible address");
+  }
+
+  set_narrow_klass_base_and_shift((address)metaspace_rs.base(),
+                                  UseSharedSpaces ? (address)cds_base : 0);
+
+  initialize_class_space(metaspace_rs);
+
+  if (PrintCompressedOopsMode || (PrintMiscellaneous && Verbose)) {
+    gclog_or_tty->print_cr("Narrow klass base: " PTR_FORMAT ", Narrow klass shift: " SIZE_FORMAT,
+                            Universe::narrow_klass_base(), Universe::narrow_klass_shift());
+    gclog_or_tty->print_cr("Metaspace Size: " SIZE_FORMAT " Address: " PTR_FORMAT " Req Addr: " PTR_FORMAT,
+                           class_metaspace_size(), metaspace_rs.base(), requested_addr);
+  }
+}
+
+// For UseCompressedKlassPointers the class space is reserved above the top of
+// the Java heap.  The argument passed in is at the base of the compressed space.
+void Metaspace::initialize_class_space(ReservedSpace rs) {
+  // The reserved space size may be bigger because of alignment, esp with UseLargePages
+  assert(rs.size() >= ClassMetaspaceSize,
+         err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize));
+  assert(using_class_space(), "Must be using class space");
+  _class_space_list = new VirtualSpaceList(rs);
+}
+
+#endif
+
 void Metaspace::global_initialize() {
  // Initialize the alignment for shared spaces.
  int max_alignment = os::vm_page_size();
+  size_t cds_total = 0;
+
+  set_class_metaspace_size(align_size_up(ClassMetaspaceSize,
+                                         os::vm_allocation_granularity()));
+
  MetaspaceShared::set_max_alignment(max_alignment);

  if (DumpSharedSpaces) {
@ -2798,15 +2935,31 @@ void Metaspace::global_initialize() {
    // Initialize with the sum of the shared space sizes.  The read-only
    // and read write metaspace chunks will be allocated out of this and the
    // remainder is the misc code and data chunks.
-    size_t total = align_size_up(SharedReadOnlySize + SharedReadWriteSize +
-                                 SharedMiscDataSize + SharedMiscCodeSize,
-                                 os::vm_allocation_granularity());
-    size_t word_size = total/wordSize;
-    _space_list = new VirtualSpaceList(word_size);
+    cds_total = FileMapInfo::shared_spaces_size();
+    _space_list = new VirtualSpaceList(cds_total/wordSize);
+
+#ifdef _LP64
+    // Set the compressed klass pointer base so that decoding of these pointers works
+    // properly when creating the shared archive.
+    assert(UseCompressedOops && UseCompressedKlassPointers,
+      "UseCompressedOops and UseCompressedKlassPointers must be set");
+    Universe::set_narrow_klass_base((address)_space_list->current_virtual_space()->bottom());
+    if (TraceMetavirtualspaceAllocation && Verbose) {
+      gclog_or_tty->print_cr("Setting_narrow_klass_base to Address: " PTR_FORMAT,
+                             _space_list->current_virtual_space()->bottom());
+    }
+
+    // Set the shift to zero.
+    assert(class_metaspace_size() < (uint64_t)(max_juint) - cds_total,
+           "CDS region is too large");
+    Universe::set_narrow_klass_shift(0);
+#endif
+
  } else {
    // If using shared space, open the file that contains the shared space
    // and map in the memory before initializing the rest of metaspace (so
    // the addresses don't conflict)
+    address cds_address = NULL;
    if (UseSharedSpaces) {
      FileMapInfo* mapinfo = new FileMapInfo();
      memset(mapinfo, 0, sizeof(FileMapInfo));
@ -2821,8 +2974,22 @@ void Metaspace::global_initialize() {
        assert(!mapinfo->is_open() && !UseSharedSpaces,
               "archive file not closed or shared spaces not disabled.");
      }
+      cds_total = FileMapInfo::shared_spaces_size();
+      cds_address = (address)mapinfo->region_base(0);
    }

+#ifdef _LP64
+    // If UseCompressedKlassPointers is set then allocate the metaspace area
+    // above the heap and above the CDS area (if it exists).
+    if (using_class_space()) {
+      if (UseSharedSpaces) {
+        allocate_metaspace_compressed_klass_ptrs((char *)(cds_address + cds_total), cds_address);
+      } else {
+        allocate_metaspace_compressed_klass_ptrs((char *)CompressedKlassPointersBase, 0);
+      }
+    }
+#endif
+
    // Initialize these before initializing the VirtualSpaceList
    _first_chunk_word_size = InitialBootClassLoaderMetaspaceSize / BytesPerWord;
    _first_chunk_word_size = align_word_size_up(_first_chunk_word_size);
@ -2840,39 +3007,28 @@ void Metaspace::global_initialize() {
  }
 }

-// For UseCompressedKlassPointers the class space is reserved as a piece of the
-// Java heap because the compression algorithm is the same for each.  The
-// argument passed in is at the top of the compressed space
-void Metaspace::initialize_class_space(ReservedSpace rs) {
-  // The reserved space size may be bigger because of alignment, esp with UseLargePages
-  assert(rs.size() >= ClassMetaspaceSize,
-         err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize));
-  _class_space_list = new VirtualSpaceList(rs);
-}
-
-void Metaspace::initialize(Mutex* lock,
-                           MetaspaceType type) {
+void Metaspace::initialize(Mutex* lock, MetaspaceType type) {

  assert(space_list() != NULL,
    "Metadata VirtualSpaceList has not been initialized");

-  _vsm = new SpaceManager(Metaspace::NonClassType, lock, space_list());
+  _vsm = new SpaceManager(NonClassType, lock, space_list());
  if (_vsm == NULL) {
    return;
  }
  size_t word_size;
  size_t class_word_size;
-  vsm()->get_initial_chunk_sizes(type,
-                                 &word_size,
-                                 &class_word_size);
+  vsm()->get_initial_chunk_sizes(type, &word_size, &class_word_size);

-  assert(class_space_list() != NULL,
-    "Class VirtualSpaceList has not been initialized");
+  if (using_class_space()) {
+    assert(class_space_list() != NULL,
+      "Class VirtualSpaceList has not been initialized");

-  // Allocate SpaceManager for classes.
-  _class_vsm = new SpaceManager(Metaspace::ClassType, lock, class_space_list());
-  if (_class_vsm == NULL) {
-    return;
+    // Allocate SpaceManager for classes.
+    _class_vsm = new SpaceManager(ClassType, lock, class_space_list());
+    if (_class_vsm == NULL) {
+      return;
+    }
  }

  MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);
@ -2888,11 +3044,13 @@ void Metaspace::initialize(Mutex* lock,
  }

  // Allocate chunk for class metadata objects
-  Metachunk* class_chunk =
-     class_space_list()->get_initialization_chunk(class_word_size,
-                                                  class_vsm()->medium_chunk_bunch());
-  if (class_chunk != NULL) {
-    class_vsm()->add_chunk(class_chunk, true);
+  if (using_class_space()) {
+    Metachunk* class_chunk =
+       class_space_list()->get_initialization_chunk(class_word_size,
+                                                    class_vsm()->medium_chunk_bunch());
+    if (class_chunk != NULL) {
+      class_vsm()->add_chunk(class_chunk, true);
+    }
  }

  _alloc_record_head = NULL;
@ -2906,7 +3064,8 @@ size_t Metaspace::align_word_size_up(size_t word_size) {

 MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) {
  // DumpSharedSpaces doesn't use class metadata area (yet)
-  if (mdtype == ClassType && !DumpSharedSpaces) {
+  // Also, don't use class_vsm() unless UseCompressedKlassPointers is true.
+  if (mdtype == ClassType && using_class_space()) {
    return  class_vsm()->allocate(word_size);
  } else {
    return  vsm()->allocate(word_size);
@ -2937,14 +3096,19 @@ char* Metaspace::bottom() const {
 }

 size_t Metaspace::used_words_slow(MetadataType mdtype) const {
-  // return vsm()->allocated_used_words();
-  return mdtype == ClassType ? class_vsm()->sum_used_in_chunks_in_use() :
-                               vsm()->sum_used_in_chunks_in_use();  // includes overhead!
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_used_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_used_in_chunks_in_use();  // includes overhead!
+  }
 }

 size_t Metaspace::free_words(MetadataType mdtype) const {
-  return mdtype == ClassType ? class_vsm()->sum_free_in_chunks_in_use() :
-                               vsm()->sum_free_in_chunks_in_use();
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_free_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_free_in_chunks_in_use();
+  }
 }

 // Space capacity in the Metaspace.  It includes
@ -2953,8 +3117,11 @@ size_t Metaspace::free_words(MetadataType mdtype) const {
 // in the space available in the dictionary which
 // is already counted in some chunk.
 size_t Metaspace::capacity_words_slow(MetadataType mdtype) const {
-  return mdtype == ClassType ? class_vsm()->sum_capacity_in_chunks_in_use() :
-                               vsm()->sum_capacity_in_chunks_in_use();
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_capacity_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_capacity_in_chunks_in_use();
+  }
 }

 size_t Metaspace::used_bytes_slow(MetadataType mdtype) const {
@ -2977,8 +3144,8 @@ void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) {
 #endif
      return;
    }
-    if (is_class) {
-       class_vsm()->deallocate(ptr, word_size);
+    if (is_class && using_class_space()) {
+      class_vsm()->deallocate(ptr, word_size);
    } else {
      vsm()->deallocate(ptr, word_size);
    }
@ -2992,7 +3159,7 @@ void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) {
 #endif
      return;
    }
-    if (is_class) {
+    if (is_class && using_class_space()) {
      class_vsm()->deallocate(ptr, word_size);
    } else {
      vsm()->deallocate(ptr, word_size);
@ -3101,14 +3268,18 @@ void Metaspace::purge() {
  MutexLockerEx cl(SpaceManager::expand_lock(),
                   Mutex::_no_safepoint_check_flag);
  space_list()->purge();
-  class_space_list()->purge();
+  if (using_class_space()) {
+    class_space_list()->purge();
+  }
 }

 void Metaspace::print_on(outputStream* out) const {
  // Print both class virtual space counts and metaspace.
  if (Verbose) {
-      vsm()->print_on(out);
+    vsm()->print_on(out);
+    if (using_class_space()) {
      class_vsm()->print_on(out);
+    }
  }
 }

@ -3122,17 +3293,21 @@ bool Metaspace::contains(const void * ptr) {
  // be needed.  Note, locking this can cause inversion problems with the
  // caller in MetaspaceObj::is_metadata() function.
  return space_list()->contains(ptr) ||
-         class_space_list()->contains(ptr);
+         (using_class_space() && class_space_list()->contains(ptr));
 }

 void Metaspace::verify() {
  vsm()->verify();
-  class_vsm()->verify();
+  if (using_class_space()) {
+    class_vsm()->verify();
+  }
 }

 void Metaspace::dump(outputStream* const out) const {
  out->print_cr("\nVirtual space manager: " INTPTR_FORMAT, vsm());
  vsm()->dump(out);
-  out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm());
-  class_vsm()->dump(out);
+  if (using_class_space()) {
+    out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm());
+    class_vsm()->dump(out);
+  }
 }
--- a/hotspot/src/share/vm/memory/metaspace.hpp
+++ b/hotspot/src/share/vm/memory/metaspace.hpp
@ -105,6 +105,16 @@ class Metaspace : public CHeapObj<mtClass> {
  // Align up the word size to the allocation word size
  static size_t align_word_size_up(size_t);

+  // Aligned size of the metaspace.
+  static size_t _class_metaspace_size;
+
+  static size_t class_metaspace_size() {
+    return _class_metaspace_size;
+  }
+  static void set_class_metaspace_size(size_t metaspace_size) {
+    _class_metaspace_size = metaspace_size;
+  }
+
  static size_t _first_chunk_word_size;
  static size_t _first_class_chunk_word_size;

@ -126,11 +136,26 @@ class Metaspace : public CHeapObj<mtClass> {

  static VirtualSpaceList* space_list()       { return _space_list; }
  static VirtualSpaceList* class_space_list() { return _class_space_list; }
+  static VirtualSpaceList* get_space_list(MetadataType mdtype) {
+    assert(mdtype != MetadataTypeCount, "MetadaTypeCount can't be used as mdtype");
+    return mdtype == ClassType ? class_space_list() : space_list();
+  }

  // This is used by DumpSharedSpaces only, where only _vsm is used. So we will
  // maintain a single list for now.
  void record_allocation(void* ptr, MetaspaceObj::Type type, size_t word_size);

+#ifdef _LP64
+  static void set_narrow_klass_base_and_shift(address metaspace_base, address cds_base);
+
+  // Returns true if can use CDS with metaspace allocated as specified address.
+  static bool can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base);
+
+  static void allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base);
+
+  static void initialize_class_space(ReservedSpace rs);
+#endif
+
  class AllocRecord : public CHeapObj<mtClass> {
  public:
    AllocRecord(address ptr, MetaspaceObj::Type type, int byte_size)
@ -151,7 +176,6 @@ class Metaspace : public CHeapObj<mtClass> {

  // Initialize globals for Metaspace
  static void global_initialize();
-  static void initialize_class_space(ReservedSpace rs);

  static size_t first_chunk_word_size() { return _first_chunk_word_size; }
  static size_t first_class_chunk_word_size() { return _first_class_chunk_word_size; }
@ -172,8 +196,6 @@ class Metaspace : public CHeapObj<mtClass> {
  MetaWord* expand_and_allocate(size_t size,
                                MetadataType mdtype);

-  static bool is_initialized() { return _class_space_list != NULL; }
-
  static bool contains(const void *ptr);
  void dump(outputStream* const out) const;

@ -190,11 +212,16 @@ class Metaspace : public CHeapObj<mtClass> {
  };

  void iterate(AllocRecordClosure *closure);
+
+  // Return TRUE only if UseCompressedKlassPointers is True and DumpSharedSpaces is False.
+  static bool using_class_space() {
+    return NOT_LP64(false) LP64_ONLY(UseCompressedKlassPointers && !DumpSharedSpaces);
+  }
+
 };

 class MetaspaceAux : AllStatic {
  static size_t free_chunks_total(Metaspace::MetadataType mdtype);
-  static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype);

 public:
  // Statistics for class space and data space in metaspace.
@ -238,13 +265,15 @@ class MetaspaceAux : AllStatic {
  // Used by MetaspaceCounters
  static size_t free_chunks_total();
  static size_t free_chunks_total_in_bytes();
+  static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype);

  static size_t allocated_capacity_words(Metaspace::MetadataType mdtype) {
    return _allocated_capacity_words[mdtype];
  }
  static size_t allocated_capacity_words() {
-    return _allocated_capacity_words[Metaspace::ClassType] +
-           _allocated_capacity_words[Metaspace::NonClassType];
+    return _allocated_capacity_words[Metaspace::NonClassType] +
+           (Metaspace::using_class_space() ?
+           _allocated_capacity_words[Metaspace::ClassType] : 0);
  }
  static size_t allocated_capacity_bytes(Metaspace::MetadataType mdtype) {
    return allocated_capacity_words(mdtype) * BytesPerWord;
@ -257,8 +286,9 @@ class MetaspaceAux : AllStatic {
    return _allocated_used_words[mdtype];
  }
  static size_t allocated_used_words() {
-    return _allocated_used_words[Metaspace::ClassType] +
-           _allocated_used_words[Metaspace::NonClassType];
+    return _allocated_used_words[Metaspace::NonClassType] +
+           (Metaspace::using_class_space() ?
+           _allocated_used_words[Metaspace::ClassType] : 0);
  }
  static size_t allocated_used_bytes(Metaspace::MetadataType mdtype) {
    return allocated_used_words(mdtype) * BytesPerWord;
@ -268,6 +298,7 @@ class MetaspaceAux : AllStatic {
  }

  static size_t free_bytes();
+  static size_t free_bytes(Metaspace::MetadataType mdtype);

  // Total capacity in all Metaspaces
  static size_t capacity_bytes_slow() {
@ -300,6 +331,7 @@ class MetaspaceAux : AllStatic {
  static void print_on(outputStream * out);
  static void print_on(outputStream * out, Metaspace::MetadataType mdtype);

+  static void print_class_waste(outputStream* out);
  static void print_waste(outputStream* out);
  static void dump(outputStream* out);
  static void verify_free_chunks();
--- a/hotspot/src/share/vm/memory/metaspaceCounters.cpp
+++ b/hotspot/src/share/vm/memory/metaspaceCounters.cpp
@ -25,11 +25,47 @@
 #include "precompiled.hpp"
 #include "memory/metaspaceCounters.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/perfData.hpp"
 #include "utilities/exceptions.hpp"

-MetaspaceCounters* MetaspaceCounters::_metaspace_counters = NULL;
+class MetaspacePerfCounters: public CHeapObj<mtInternal> {
+  friend class VMStructs;
+  PerfVariable*      _capacity;
+  PerfVariable*      _used;
+  PerfVariable*      _max_capacity;

-size_t MetaspaceCounters::calc_total_capacity() {
+  PerfVariable* create_variable(const char *ns, const char *name, size_t value, TRAPS) {
+    const char *path = PerfDataManager::counter_name(ns, name);
+    return PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value, THREAD);
+  }
+
+  void create_constant(const char *ns, const char *name, size_t value, TRAPS) {
+    const char *path = PerfDataManager::counter_name(ns, name);
+    PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, THREAD);
+  }
+
+ public:
+  MetaspacePerfCounters(const char* ns, size_t min_capacity, size_t curr_capacity, size_t max_capacity, size_t used) {
+    EXCEPTION_MARK;
+    ResourceMark rm;
+
+    create_constant(ns, "minCapacity", min_capacity, THREAD);
+    _capacity = create_variable(ns, "capacity", curr_capacity, THREAD);
+    _max_capacity = create_variable(ns, "maxCapacity", max_capacity, THREAD);
+    _used = create_variable(ns, "used", used, THREAD);
+  }
+
+  void update(size_t capacity, size_t max_capacity, size_t used) {
+    _capacity->set_value(capacity);
+    _max_capacity->set_value(max_capacity);
+    _used->set_value(used);
+  }
+};
+
+MetaspacePerfCounters* MetaspaceCounters::_perf_counters = NULL;
+
+size_t MetaspaceCounters::calculate_capacity() {
  // The total capacity is the sum of
  //   1) capacity of Metachunks in use by all Metaspaces
  //   2) unused space at the end of each Metachunk
@ -39,95 +75,65 @@ size_t MetaspaceCounters::calc_total_capacity() {
  return total_capacity;
 }

-MetaspaceCounters::MetaspaceCounters() :
-    _capacity(NULL),
-    _used(NULL),
-    _max_capacity(NULL) {
-  if (UsePerfData) {
-    size_t min_capacity = MetaspaceAux::min_chunk_size();
-    size_t max_capacity = MetaspaceAux::reserved_in_bytes();
-    size_t curr_capacity = calc_total_capacity();
-    size_t used = MetaspaceAux::allocated_used_bytes();
-
-    initialize(min_capacity, max_capacity, curr_capacity, used);
-  }
-}
-
-static PerfVariable* create_ms_variable(const char *ns,
-                                        const char *name,
-                                        size_t value,
-                                        TRAPS) {
-  const char *path = PerfDataManager::counter_name(ns, name);
-  PerfVariable *result =
-      PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value,
-                                       CHECK_NULL);
-  return result;
-}
-
-static void create_ms_constant(const char *ns,
-                               const char *name,
-                               size_t value,
-                               TRAPS) {
-  const char *path = PerfDataManager::counter_name(ns, name);
-  PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, CHECK);
-}
-
-void MetaspaceCounters::initialize(size_t min_capacity,
-                                   size_t max_capacity,
-                                   size_t curr_capacity,
-                                   size_t used) {
-
-  if (UsePerfData) {
-    EXCEPTION_MARK;
-    ResourceMark rm;
-
-    const char *ms = "metaspace";
-
-    create_ms_constant(ms, "minCapacity", min_capacity, CHECK);
-    _max_capacity = create_ms_variable(ms, "maxCapacity", max_capacity, CHECK);
-    _capacity = create_ms_variable(ms, "capacity", curr_capacity, CHECK);
-    _used = create_ms_variable(ms, "used", used, CHECK);
-  }
-}
-
-void MetaspaceCounters::update_capacity() {
-  assert(UsePerfData, "Should not be called unless being used");
-  size_t total_capacity = calc_total_capacity();
-  _capacity->set_value(total_capacity);
-}
-
-void MetaspaceCounters::update_used() {
-  assert(UsePerfData, "Should not be called unless being used");
-  size_t used_in_bytes = MetaspaceAux::allocated_used_bytes();
-  _used->set_value(used_in_bytes);
-}
-
-void MetaspaceCounters::update_max_capacity() {
-  assert(UsePerfData, "Should not be called unless being used");
-  assert(_max_capacity != NULL, "Should be initialized");
-  size_t reserved_in_bytes = MetaspaceAux::reserved_in_bytes();
-  _max_capacity->set_value(reserved_in_bytes);
-}
-
-void MetaspaceCounters::update_all() {
-  if (UsePerfData) {
-    update_used();
-    update_capacity();
-    update_max_capacity();
-  }
-}
-
 void MetaspaceCounters::initialize_performance_counters() {
  if (UsePerfData) {
-    assert(_metaspace_counters == NULL, "Should only be initialized once");
-    _metaspace_counters = new MetaspaceCounters();
+    assert(_perf_counters == NULL, "Should only be initialized once");
+
+    size_t min_capacity = MetaspaceAux::min_chunk_size();
+    size_t capacity = calculate_capacity();
+    size_t max_capacity = MetaspaceAux::reserved_in_bytes();
+    size_t used = MetaspaceAux::allocated_used_bytes();
+
+    _perf_counters = new MetaspacePerfCounters("metaspace", min_capacity, capacity, max_capacity, used);
  }
 }

 void MetaspaceCounters::update_performance_counters() {
  if (UsePerfData) {
-    assert(_metaspace_counters != NULL, "Should be initialized");
-    _metaspace_counters->update_all();
+    assert(_perf_counters != NULL, "Should be initialized");
+
+    size_t capacity = calculate_capacity();
+    size_t max_capacity = MetaspaceAux::reserved_in_bytes();
+    size_t used = MetaspaceAux::allocated_used_bytes();
+
+    _perf_counters->update(capacity, max_capacity, used);
  }
 }

+MetaspacePerfCounters* CompressedClassSpaceCounters::_perf_counters = NULL;
+
+size_t CompressedClassSpaceCounters::calculate_capacity() {
+    return MetaspaceAux::allocated_capacity_bytes(_class_type) +
+           MetaspaceAux::free_bytes(_class_type) +
+           MetaspaceAux::free_chunks_total_in_bytes(_class_type);
+}
+
+void CompressedClassSpaceCounters::update_performance_counters() {
+  if (UsePerfData && UseCompressedKlassPointers) {
+    assert(_perf_counters != NULL, "Should be initialized");
+
+    size_t capacity = calculate_capacity();
+    size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type);
+    size_t used = MetaspaceAux::allocated_used_bytes(_class_type);
+
+    _perf_counters->update(capacity, max_capacity, used);
+  }
+}
+
+void CompressedClassSpaceCounters::initialize_performance_counters() {
+  if (UsePerfData) {
+    assert(_perf_counters == NULL, "Should only be initialized once");
+    const char* ns = "compressedclassspace";
+
+    if (UseCompressedKlassPointers) {
+      size_t min_capacity = MetaspaceAux::min_chunk_size();
+      size_t capacity = calculate_capacity();
+      size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type);
+      size_t used = MetaspaceAux::allocated_used_bytes(_class_type);
+
+      _perf_counters = new MetaspacePerfCounters(ns, min_capacity, capacity, max_capacity, used);
+    } else {
+      _perf_counters = new MetaspacePerfCounters(ns, 0, 0, 0, 0);
+    }
+  }
+}
--- a/hotspot/src/share/vm/memory/metaspaceCounters.hpp
+++ b/hotspot/src/share/vm/memory/metaspaceCounters.hpp
@ -25,31 +25,27 @@
 #ifndef SHARE_VM_MEMORY_METASPACECOUNTERS_HPP
 #define SHARE_VM_MEMORY_METASPACECOUNTERS_HPP

-#include "runtime/perfData.hpp"
+#include "memory/metaspace.hpp"
+
+class MetaspacePerfCounters;
+
+class MetaspaceCounters: public AllStatic {
+  static MetaspacePerfCounters* _perf_counters;
+  static size_t calculate_capacity();

-class MetaspaceCounters: public CHeapObj<mtClass> {
-  friend class VMStructs;
-  PerfVariable*      _capacity;
-  PerfVariable*      _used;
-  PerfVariable*      _max_capacity;
-  static MetaspaceCounters* _metaspace_counters;
-  void initialize(size_t min_capacity,
-                  size_t max_capacity,
-                  size_t curr_capacity,
-                  size_t used);
-  size_t calc_total_capacity();
 public:
-  MetaspaceCounters();
-  ~MetaspaceCounters();
-
-  void update_capacity();
-  void update_used();
-  void update_max_capacity();
-
-  void update_all();
-
  static void initialize_performance_counters();
  static void update_performance_counters();
-
 };
+
+class CompressedClassSpaceCounters: public AllStatic {
+  static MetaspacePerfCounters* _perf_counters;
+  static size_t calculate_capacity();
+  static const Metaspace::MetadataType _class_type = Metaspace::ClassType;
+
+ public:
+  static void initialize_performance_counters();
+  static void update_performance_counters();
+};
+
 #endif // SHARE_VM_MEMORY_METASPACECOUNTERS_HPP
--- a/hotspot/src/share/vm/memory/metaspaceShared.cpp
+++ b/hotspot/src/share/vm/memory/metaspaceShared.cpp
@ -52,7 +52,6 @@ void MetaspaceShared::serialize(SerializeClosure* soc) {
  int tag = 0;
  soc->do_tag(--tag);

-  assert(!UseCompressedOops, "UseCompressedOops doesn't work with shared archive");
  // Verify the sizes of various metadata in the system.
  soc->do_tag(sizeof(Method));
  soc->do_tag(sizeof(ConstMethod));
--- a/hotspot/src/share/vm/memory/universe.cpp
+++ b/hotspot/src/share/vm/memory/universe.cpp
@ -145,8 +145,6 @@ NarrowPtrStruct Universe::_narrow_oop = { NULL, 0, true };
 NarrowPtrStruct Universe::_narrow_klass = { NULL, 0, true };
 address Universe::_narrow_ptrs_base;

-size_t          Universe::_class_metaspace_size;
-
 void Universe::basic_type_classes_do(void f(Klass*)) {
  f(boolArrayKlassObj());
  f(byteArrayKlassObj());
@ -641,6 +639,8 @@ jint universe_init() {
    return status;
  }

+  Metaspace::global_initialize();
+
  // Create memory for metadata.  Must be after initializing heap for
  // DumpSharedSpaces.
  ClassLoaderData::init_null_class_loader_data();
@ -693,13 +693,9 @@ char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
    if (!FLAG_IS_DEFAULT(HeapBaseMinAddress) && (mode == UnscaledNarrowOop)) {
      base = HeapBaseMinAddress;

-    // If the total size and the metaspace size are small enough to allow
-    // UnscaledNarrowOop then just use UnscaledNarrowOop.
-    } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop) &&
-        (!UseCompressedKlassPointers ||
-          (((OopEncodingHeapMax - heap_size) + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax))) {
-      // We don't need to check the metaspace size here because it is always smaller
-      // than total_size.
+    // If the total size is small enough to allow UnscaledNarrowOop then
+    // just use UnscaledNarrowOop.
+    } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop)) {
      if ((total_size <= NarrowOopHeapMax) && (mode == UnscaledNarrowOop) &&
          (Universe::narrow_oop_shift() == 0)) {
        // Use 32-bits oops without encoding and
@ -716,13 +712,6 @@ char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
          base = (OopEncodingHeapMax - heap_size);
        }
      }
-
-    // See if ZeroBaseNarrowOop encoding will work for a heap based at
-    // (KlassEncodingMetaspaceMax - class_metaspace_size()).
-    } else if (UseCompressedKlassPointers && (mode != HeapBasedNarrowOop) &&
-        (Universe::class_metaspace_size() + HeapBaseMinAddress <= KlassEncodingMetaspaceMax) &&
-        (KlassEncodingMetaspaceMax + heap_size - Universe::class_metaspace_size() <= OopEncodingHeapMax)) {
-      base = (KlassEncodingMetaspaceMax - Universe::class_metaspace_size());
    } else {
      // UnscaledNarrowOop encoding didn't work, and no base was found for ZeroBasedOops or
      // HeapBasedNarrowOop encoding was requested.  So, can't reserve below 32Gb.
@ -732,8 +721,7 @@ char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
    // Set narrow_oop_base and narrow_oop_use_implicit_null_checks
    // used in ReservedHeapSpace() constructors.
    // The final values will be set in initialize_heap() below.
-    if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax) &&
-        (!UseCompressedKlassPointers || (base + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax)) {
+    if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax)) {
      // Use zero based compressed oops
      Universe::set_narrow_oop_base(NULL);
      // Don't need guard page for implicit checks in indexed
@ -816,9 +804,7 @@ jint Universe::initialize_heap() {
      tty->print("heap address: " PTR_FORMAT ", size: " SIZE_FORMAT " MB",
                 Universe::heap()->base(), Universe::heap()->reserved_region().byte_size()/M);
    }
-    if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) ||
-        (UseCompressedKlassPointers &&
-        ((uint64_t)Universe::heap()->base() + Universe::class_metaspace_size() > KlassEncodingMetaspaceMax))) {
+    if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax)) {
      // Can't reserve heap below 32Gb.
      // keep the Universe::narrow_oop_base() set in Universe::reserve_heap()
      Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
@ -849,20 +835,16 @@ jint Universe::initialize_heap() {
        }
      }
    }
+
    if (verbose) {
      tty->cr();
      tty->cr();
    }
-    if (UseCompressedKlassPointers) {
-      Universe::set_narrow_klass_base(Universe::narrow_oop_base());
-      Universe::set_narrow_klass_shift(MIN2(Universe::narrow_oop_shift(), LogKlassAlignmentInBytes));
-    }
    Universe::set_narrow_ptrs_base(Universe::narrow_oop_base());
  }
-  // Universe::narrow_oop_base() is one page below the metaspace
-  // base. The actual metaspace base depends on alignment constraints
-  // so we don't know its exact location here.
-  assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() - os::vm_page_size() - ClassMetaspaceSize) ||
+  // Universe::narrow_oop_base() is one page below the heap.
+  assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() -
+         os::vm_page_size()) ||
         Universe::narrow_oop_base() == NULL, "invalid value");
  assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
         Universe::narrow_oop_shift() == 0, "invalid value");
@ -882,12 +864,7 @@ jint Universe::initialize_heap() {

 // Reserve the Java heap, which is now the same for all GCs.
 ReservedSpace Universe::reserve_heap(size_t heap_size, size_t alignment) {
-  // Add in the class metaspace area so the classes in the headers can
-  // be compressed the same as instances.
-  // Need to round class space size up because it's below the heap and
-  // the actual alignment depends on its size.
-  Universe::set_class_metaspace_size(align_size_up(ClassMetaspaceSize, alignment));
-  size_t total_reserved = align_size_up(heap_size + Universe::class_metaspace_size(), alignment);
+  size_t total_reserved = align_size_up(heap_size, alignment);
  assert(!UseCompressedOops || (total_reserved <= (OopEncodingHeapMax - os::vm_page_size())),
      "heap size is too big for compressed oops");
  char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
@ -923,28 +900,17 @@ ReservedSpace Universe::reserve_heap(size_t heap_size, size_t alignment) {
    return total_rs;
  }

-  // Split the reserved space into main Java heap and a space for
-  // classes so that they can be compressed using the same algorithm
-  // as compressed oops. If compress oops and compress klass ptrs are
-  // used we need the meta space first: if the alignment used for
-  // compressed oops is greater than the one used for compressed klass
-  // ptrs, a metadata space on top of the heap could become
-  // unreachable.
-  ReservedSpace class_rs = total_rs.first_part(Universe::class_metaspace_size());
-  ReservedSpace heap_rs = total_rs.last_part(Universe::class_metaspace_size(), alignment);
-  Metaspace::initialize_class_space(class_rs);
-
  if (UseCompressedOops) {
    // Universe::initialize_heap() will reset this to NULL if unscaled
    // or zero-based narrow oops are actually used.
    address base = (address)(total_rs.base() - os::vm_page_size());
    Universe::set_narrow_oop_base(base);
  }
-  return heap_rs;
+  return total_rs;
 }


-// It's the caller's repsonsibility to ensure glitch-freedom
+// It's the caller's responsibility to ensure glitch-freedom
 // (if required).
 void Universe::update_heap_info_at_gc() {
  _heap_capacity_at_last_gc = heap()->capacity();
@ -1135,6 +1101,8 @@ bool universe_post_init() {

  // Initialize performance counters for metaspaces
  MetaspaceCounters::initialize_performance_counters();
+  CompressedClassSpaceCounters::initialize_performance_counters();
+
  MemoryService::add_metaspace_memory_pools();

  GC_locker::unlock();  // allow gc after bootstrapping
--- a/hotspot/src/share/vm/memory/universe.hpp
+++ b/hotspot/src/share/vm/memory/universe.hpp
@ -75,10 +75,10 @@ class LatestMethodCache : public CHeapObj<mtClass> {
 };


-// For UseCompressedOops and UseCompressedKlassPointers.
+// For UseCompressedOops.
 struct NarrowPtrStruct {
-  // Base address for oop/klass-within-java-object materialization.
-  // NULL if using wide oops/klasses or zero based narrow oops/klasses.
+  // Base address for oop-within-java-object materialization.
+  // NULL if using wide oops or zero based narrow oops.
  address _base;
  // Number of shift bits for encoding/decoding narrow ptrs.
  // 0 if using wide ptrs or zero based unscaled narrow ptrs,
@ -106,6 +106,7 @@ class Universe: AllStatic {
  friend class SystemDictionary;
  friend class VMStructs;
  friend class VM_PopulateDumpSharedSpace;
+  friend class Metaspace;

  friend jint  universe_init();
  friend void  universe2_init();
@ -184,9 +185,6 @@ class Universe: AllStatic {
  static struct NarrowPtrStruct _narrow_klass;
  static address _narrow_ptrs_base;

-  // Aligned size of the metaspace.
-  static size_t _class_metaspace_size;
-
  // array of dummy objects used with +FullGCAlot
  debug_only(static objArrayOop _fullgc_alot_dummy_array;)
  // index of next entry to clear
@ -238,15 +236,6 @@ class Universe: AllStatic {
    assert(UseCompressedOops, "no compressed ptrs?");
    _narrow_oop._use_implicit_null_checks   = use;
  }
-  static bool     reserve_metaspace_helper(bool with_base = false);
-  static ReservedHeapSpace reserve_heap_metaspace(size_t heap_size, size_t alignment, bool& contiguous);
-
-  static size_t  class_metaspace_size() {
-    return _class_metaspace_size;
-  }
-  static void    set_class_metaspace_size(size_t metaspace_size) {
-    _class_metaspace_size = metaspace_size;
-  }

  // Debugging
  static int _verify_count;                           // number of verifies done
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp
@ -269,7 +269,7 @@ InstanceKlass::InstanceKlass(int vtable_len,
  set_fields(NULL, 0);
  set_constants(NULL);
  set_class_loader_data(NULL);
-  set_source_file_name(NULL);
+  set_source_file_name_index(0);
  set_source_debug_extension(NULL, 0);
  set_array_name(NULL);
  set_inner_classes(NULL);
@ -284,7 +284,7 @@ InstanceKlass::InstanceKlass(int vtable_len,
  set_osr_nmethods_head(NULL);
  set_breakpoints(NULL);
  init_previous_versions();
-  set_generic_signature(NULL);
+  set_generic_signature_index(0);
  release_set_methods_jmethod_ids(NULL);
  release_set_methods_cached_itable_indices(NULL);
  set_annotations(NULL);
@ -2368,18 +2368,12 @@ void InstanceKlass::release_C_heap_structures() {
  // unreference array name derived from this class name (arrays of an unloaded
  // class can't be referenced anymore).
  if (_array_name != NULL)  _array_name->decrement_refcount();
-  if (_source_file_name != NULL) _source_file_name->decrement_refcount();
  if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension, mtClass);

  assert(_total_instanceKlass_count >= 1, "Sanity check");
  Atomic::dec(&_total_instanceKlass_count);
 }

-void InstanceKlass::set_source_file_name(Symbol* n) {
-  _source_file_name = n;
-  if (_source_file_name != NULL) _source_file_name->increment_refcount();
-}
-
 void InstanceKlass::set_source_debug_extension(char* array, int length) {
  if (array == NULL) {
    _source_debug_extension = NULL;
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp
@ -201,14 +201,10 @@ class InstanceKlass: public Klass {
  // number_of_inner_classes * 4 + enclosing_method_attribute_size.
  Array<jushort>* _inner_classes;

-  // Name of source file containing this klass, NULL if not specified.
-  Symbol*         _source_file_name;
  // the source debug extension for this klass, NULL if not specified.
  // Specified as UTF-8 string without terminating zero byte in the classfile,
  // it is stored in the instanceklass as a NULL-terminated UTF-8 string
  char*           _source_debug_extension;
-  // Generic signature, or null if none.
-  Symbol*         _generic_signature;
  // Array name derived from this class which needs unreferencing
  // if this class is unloaded.
  Symbol*         _array_name;
@ -217,6 +213,12 @@ class InstanceKlass: public Klass {
  // (including inherited fields but after header_size()).
  int             _nonstatic_field_size;
  int             _static_field_size;    // number words used by static fields (oop and non-oop) in this klass
+  // Constant pool index to the utf8 entry of the Generic signature,
+  // or 0 if none.
+  u2              _generic_signature_index;
+  // Constant pool index to the utf8 entry for the name of source file
+  // containing this klass, 0 if not specified.
+  u2              _source_file_name_index;
  u2              _static_oop_field_count;// number of static oop fields in this klass
  u2              _java_fields_count;    // The number of declared Java fields
  int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
@ -570,8 +572,16 @@ class InstanceKlass: public Klass {
  }

  // source file name
-  Symbol* source_file_name() const         { return _source_file_name; }
-  void set_source_file_name(Symbol* n);
+  Symbol* source_file_name() const               {
+    return (_source_file_name_index == 0) ?
+      (Symbol*)NULL : _constants->symbol_at(_source_file_name_index);
+  }
+  u2 source_file_name_index() const              {
+    return _source_file_name_index;
+  }
+  void set_source_file_name_index(u2 sourcefile_index) {
+    _source_file_name_index = sourcefile_index;
+  }

  // minor and major version numbers of class file
  u2 minor_version() const                 { return _minor_version; }
@ -648,8 +658,16 @@ class InstanceKlass: public Klass {
  void set_initial_method_idnum(u2 value)             { _idnum_allocated_count = value; }

  // generics support
-  Symbol* generic_signature() const                   { return _generic_signature; }
-  void set_generic_signature(Symbol* sig)             { _generic_signature = sig; }
+  Symbol* generic_signature() const                   {
+    return (_generic_signature_index == 0) ?
+      (Symbol*)NULL : _constants->symbol_at(_generic_signature_index);
+  }
+  u2 generic_signature_index() const                  {
+    return _generic_signature_index;
+  }
+  void set_generic_signature_index(u2 sig_index)      {
+    _generic_signature_index = sig_index;
+  }

  u2 enclosing_method_data(int offset);
  u2 enclosing_method_class_index() {
--- a/hotspot/src/share/vm/oops/klass.hpp
+++ b/hotspot/src/share/vm/oops/klass.hpp
@ -352,7 +352,8 @@ class Klass : public Metadata {
  static int layout_helper_log2_element_size(jint lh) {
    assert(lh < (jint)_lh_neutral_value, "must be array");
    int l2esz = (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
-    assert(l2esz <= LogBitsPerLong, "sanity");
+    assert(l2esz <= LogBitsPerLong,
+        err_msg("sanity. l2esz: 0x%x for lh: 0x%x", (uint)l2esz, (uint)lh));
    return l2esz;
  }
  static jint array_layout_helper(jint tag, int hsize, BasicType etype, int log2_esize) {
@ -703,6 +704,16 @@ class Klass : public Metadata {

  virtual void oop_verify_on(oop obj, outputStream* st);

+  static bool is_null(narrowKlass obj);
+  static bool is_null(Klass* obj);
+
+  // klass encoding for klass pointer in objects.
+  static narrowKlass encode_klass_not_null(Klass* v);
+  static narrowKlass encode_klass(Klass* v);
+
+  static Klass* decode_klass_not_null(narrowKlass v);
+  static Klass* decode_klass(narrowKlass v);
+
 private:
  // barriers used by klass_oop_store
  void klass_update_barrier_set(oop v);
--- a/hotspot/src/share/vm/oops/klass.inline.hpp
+++ b/hotspot/src/share/vm/oops/klass.inline.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -25,6 +25,7 @@
 #ifndef SHARE_VM_OOPS_KLASS_INLINE_HPP
 #define SHARE_VM_OOPS_KLASS_INLINE_HPP

+#include "memory/universe.hpp"
 #include "oops/klass.hpp"
 #include "oops/markOop.hpp"

@ -33,4 +34,41 @@ inline void Klass::set_prototype_header(markOop header) {
  _prototype_header = header;
 }

+inline bool Klass::is_null(Klass* obj)  { return obj == NULL; }
+inline bool Klass::is_null(narrowKlass obj) { return obj == 0; }
+
+// Encoding and decoding for klass field.
+
+inline bool check_klass_alignment(Klass* obj) {
+  return (intptr_t)obj % KlassAlignmentInBytes == 0;
+}
+
+inline narrowKlass Klass::encode_klass_not_null(Klass* v) {
+  assert(!is_null(v), "klass value can never be zero");
+  assert(check_klass_alignment(v), "Address not aligned");
+  int    shift = Universe::narrow_klass_shift();
+  uint64_t pd = (uint64_t)(pointer_delta((void*)v, Universe::narrow_klass_base(), 1));
+  assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding");
+  uint64_t result = pd >> shift;
+  assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow");
+  assert(decode_klass(result) == v, "reversibility");
+  return (narrowKlass)result;
+}
+
+inline narrowKlass Klass::encode_klass(Klass* v) {
+  return is_null(v) ? (narrowKlass)0 : encode_klass_not_null(v);
+}
+
+inline Klass* Klass::decode_klass_not_null(narrowKlass v) {
+  assert(!is_null(v), "narrow klass value can never be zero");
+  int    shift = Universe::narrow_klass_shift();
+  Klass* result = (Klass*)(void*)((uintptr_t)Universe::narrow_klass_base() + ((uintptr_t)v << shift));
+  assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result));
+  return result;
+}
+
+inline Klass* Klass::decode_klass(narrowKlass v) {
+  return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v);
+}
+
 #endif // SHARE_VM_OOPS_KLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/method.cpp
+++ b/hotspot/src/share/vm/oops/method.cpp
@ -747,6 +747,7 @@ void Method::set_not_compilable(int comp_level, bool report, const char* reason)
      set_not_c2_compilable();
  }
  CompilationPolicy::policy()->disable_compilation(this);
+  assert(!CompilationPolicy::can_be_compiled(this, comp_level), "sanity check");
 }

 bool Method::is_not_osr_compilable(int comp_level) const {
@ -773,6 +774,7 @@ void Method::set_not_osr_compilable(int comp_level, bool report, const char* rea
      set_not_c2_osr_compilable();
  }
  CompilationPolicy::policy()->disable_compilation(this);
+  assert(!CompilationPolicy::can_be_osr_compiled(this, comp_level), "sanity check");
 }

 // Revert to using the interpreter and clear out the nmethod
--- a/hotspot/src/share/vm/oops/oop.hpp
+++ b/hotspot/src/share/vm/oops/oop.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -62,7 +62,7 @@ class oopDesc {
  volatile markOop  _mark;
  union _metadata {
    Klass*      _klass;
-    narrowOop       _compressed_klass;
+    narrowKlass _compressed_klass;
  } _metadata;

  // Fast access to barrier set.  Must be initialized.
@ -84,7 +84,7 @@ class oopDesc {
  Klass* klass() const;
  Klass* klass_or_null() const volatile;
  Klass** klass_addr();
-  narrowOop* compressed_klass_addr();
+  narrowKlass* compressed_klass_addr();

  void set_klass(Klass* k);

@ -189,13 +189,6 @@ class oopDesc {
                                         oop compare_value,
                                         bool prebarrier = false);

-  // klass encoding for klass pointer in objects.
-  static narrowOop encode_klass_not_null(Klass* v);
-  static narrowOop encode_klass(Klass* v);
-
-  static Klass* decode_klass_not_null(narrowOop v);
-  static Klass* decode_klass(narrowOop v);
-
  // Access to fields in a instanceOop through these methods.
  oop obj_field(int offset) const;
  volatile oop obj_field_volatile(int offset) const;
--- a/hotspot/src/share/vm/oops/oop.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
@ -35,7 +35,7 @@
 #include "memory/specialized_oop_closures.hpp"
 #include "oops/arrayKlass.hpp"
 #include "oops/arrayOop.hpp"
-#include "oops/klass.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/markOop.inline.hpp"
 #include "oops/oop.hpp"
 #include "runtime/atomic.hpp"
@ -70,7 +70,7 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {

 inline Klass* oopDesc::klass() const {
  if (UseCompressedKlassPointers) {
-    return decode_klass_not_null(_metadata._compressed_klass);
+    return Klass::decode_klass_not_null(_metadata._compressed_klass);
  } else {
    return _metadata._klass;
  }
@ -79,7 +79,7 @@ inline Klass* oopDesc::klass() const {
 inline Klass* oopDesc::klass_or_null() const volatile {
  // can be NULL in CMS
  if (UseCompressedKlassPointers) {
-    return decode_klass(_metadata._compressed_klass);
+    return Klass::decode_klass(_metadata._compressed_klass);
  } else {
    return _metadata._klass;
  }
@ -87,7 +87,7 @@ inline Klass* oopDesc::klass_or_null() const volatile {

 inline int oopDesc::klass_gap_offset_in_bytes() {
  assert(UseCompressedKlassPointers, "only applicable to compressed klass pointers");
-  return oopDesc::klass_offset_in_bytes() + sizeof(narrowOop);
+  return oopDesc::klass_offset_in_bytes() + sizeof(narrowKlass);
 }

 inline Klass** oopDesc::klass_addr() {
@ -97,9 +97,9 @@ inline Klass** oopDesc::klass_addr() {
  return (Klass**) &_metadata._klass;
 }

-inline narrowOop* oopDesc::compressed_klass_addr() {
+inline narrowKlass* oopDesc::compressed_klass_addr() {
  assert(UseCompressedKlassPointers, "only called by compressed klass pointers");
-  return (narrowOop*) &_metadata._compressed_klass;
+  return &_metadata._compressed_klass;
 }

 inline void oopDesc::set_klass(Klass* k) {
@ -107,7 +107,7 @@ inline void oopDesc::set_klass(Klass* k) {
  assert(Universe::is_bootstrapping() || k != NULL, "must be a real Klass*");
  assert(Universe::is_bootstrapping() || k->is_klass(), "not a Klass*");
  if (UseCompressedKlassPointers) {
-    *compressed_klass_addr() = encode_klass_not_null(k);
+    *compressed_klass_addr() = Klass::encode_klass_not_null(k);
  } else {
    *klass_addr() = k;
  }
@ -127,7 +127,7 @@ inline void oopDesc::set_klass_to_list_ptr(oop k) {
  // This is only to be used during GC, for from-space objects, so no
  // barrier is needed.
  if (UseCompressedKlassPointers) {
-    _metadata._compressed_klass = encode_heap_oop(k);  // may be null (parnew overflow handling)
+    _metadata._compressed_klass = (narrowKlass)encode_heap_oop(k);  // may be null (parnew overflow handling)
  } else {
    _metadata._klass = (Klass*)(address)k;
  }
@ -136,7 +136,7 @@ inline void oopDesc::set_klass_to_list_ptr(oop k) {
 inline oop oopDesc::list_ptr_from_klass() {
  // This is only to be used during GC, for from-space objects.
  if (UseCompressedKlassPointers) {
-    return decode_heap_oop(_metadata._compressed_klass);
+    return decode_heap_oop((narrowOop)_metadata._compressed_klass);
  } else {
    // Special case for GC
    return (oop)(address)_metadata._klass;
@ -176,7 +176,6 @@ inline address*  oopDesc::address_field_addr(int offset) const { return (address
 // the right type and inlines the appopriate code).

 inline bool oopDesc::is_null(oop obj)       { return obj == NULL; }
-inline bool oopDesc::is_null(Klass* obj)  { return obj == NULL; }
 inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; }

 // Algorithm for encoding and decoding oops from 64 bit pointers to 32 bit
@ -186,9 +185,6 @@ inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; }
 inline bool check_obj_alignment(oop obj) {
  return (intptr_t)obj % MinObjAlignmentInBytes == 0;
 }
-inline bool check_klass_alignment(Klass* obj) {
-  return (intptr_t)obj % KlassAlignmentInBytes == 0;
-}

 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
  assert(!is_null(v), "oop value can never be zero");
@ -224,39 +220,6 @@ inline oop oopDesc::decode_heap_oop(narrowOop v) {
 inline oop oopDesc::decode_heap_oop_not_null(oop v) { return v; }
 inline oop oopDesc::decode_heap_oop(oop v)  { return v; }

-// Encoding and decoding for klass field.  It is copied code, but someday
-// might not be the same as oop.
-
-inline narrowOop oopDesc::encode_klass_not_null(Klass* v) {
-  assert(!is_null(v), "klass value can never be zero");
-  assert(check_klass_alignment(v), "Address not aligned");
-  address base = Universe::narrow_klass_base();
-  int    shift = Universe::narrow_klass_shift();
-  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
-  assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding");
-  uint64_t result = pd >> shift;
-  assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow");
-  assert(decode_klass(result) == v, "reversibility");
-  return (narrowOop)result;
-}
-
-inline narrowOop oopDesc::encode_klass(Klass* v) {
-  return (is_null(v)) ? (narrowOop)0 : encode_klass_not_null(v);
-}
-
-inline Klass* oopDesc::decode_klass_not_null(narrowOop v) {
-  assert(!is_null(v), "narrow oop value can never be zero");
-  address base = Universe::narrow_klass_base();
-  int    shift = Universe::narrow_klass_shift();
-  Klass* result = (Klass*)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
-  assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result));
-  return result;
-}
-
-inline Klass* oopDesc::decode_klass(narrowOop v) {
-  return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v);
-}
-
 // Load an oop out of the Java heap as is without decoding.
 // Called by GC to check for null before decoding.
 inline oop       oopDesc::load_heap_oop(oop* p)          { return *p; }
--- a/hotspot/src/share/vm/oops/oopsHierarchy.hpp
+++ b/hotspot/src/share/vm/oops/oopsHierarchy.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,10 @@
 // of B, A's representation is a prefix of B's representation.

 typedef juint narrowOop; // Offset instead of address for an oop within a java object
+
+// If compressed klass pointers then use narrowKlass.
+typedef juint  narrowKlass;
+
 typedef void* OopOrNarrowOopStar;
 typedef class   markOopDesc*                markOop;

--- a/hotspot/src/share/vm/opto/block.cpp
+++ b/hotspot/src/share/vm/opto/block.cpp
@ -35,10 +35,6 @@
 #include "opto/rootnode.hpp"
 #include "utilities/copy.hpp"

-// Optimization - Graph Style
-
-
-//-----------------------------------------------------------------------------
 void Block_Array::grow( uint i ) {
  assert(i >= Max(), "must be an overflow");
  debug_only(_limit = i+1);
@ -54,7 +50,6 @@ void Block_Array::grow( uint i ) {
  Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
 }

-//=============================================================================
 void Block_List::remove(uint i) {
  assert(i < _cnt, "index out of bounds");
  Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
@ -76,8 +71,6 @@ void Block_List::print() {
 }
 #endif

-//=============================================================================
-
 uint Block::code_alignment() {
  // Check for Root block
  if (_pre_order == 0) return CodeEntryAlignment;
@ -113,7 +106,6 @@ uint Block::compute_loop_alignment() {
  return unit_sz; // no particular alignment
 }

-//-----------------------------------------------------------------------------
 // Compute the size of first 'inst_cnt' instructions in this block.
 // Return the number of instructions left to compute if the block has
 // less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
@ -138,7 +130,6 @@ uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
  return inst_cnt;
 }

-//-----------------------------------------------------------------------------
 uint Block::find_node( const Node *n ) const {
  for( uint i = 0; i < _nodes.size(); i++ ) {
    if( _nodes[i] == n )
@ -153,7 +144,6 @@ void Block::find_remove( const Node *n ) {
  _nodes.remove(find_node(n));
 }

-//------------------------------is_Empty---------------------------------------
 // Return empty status of a block.  Empty blocks contain only the head, other
 // ideal nodes, and an optional trailing goto.
 int Block::is_Empty() const {
@ -192,7 +182,6 @@ int Block::is_Empty() const {
  return not_empty;
 }

-//------------------------------has_uncommon_code------------------------------
 // Return true if the block's code implies that it is likely to be
 // executed infrequently.  Check to see if the block ends in a Halt or
 // a low probability call.
@ -218,7 +207,6 @@ bool Block::has_uncommon_code() const {
  return op == Op_Halt;
 }

-//------------------------------is_uncommon------------------------------------
 // True if block is low enough frequency or guarded by a test which
 // mostly does not go here.
 bool Block::is_uncommon(PhaseCFG* cfg) const {
@ -271,7 +259,6 @@ bool Block::is_uncommon(PhaseCFG* cfg) const {
  return false;
 }

-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void Block::dump_bidx(const Block* orig, outputStream* st) const {
  if (_pre_order) st->print("B%d",_pre_order);
@ -364,13 +351,12 @@ void Block::dump(const PhaseCFG* cfg) const {
 }
 #endif

-//=============================================================================
-//------------------------------PhaseCFG---------------------------------------
 PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
 : Phase(CFG)
 , _block_arena(arena)
-, _node_to_block_mapping(arena)
 , _root(root)
+, _matcher(matcher)
+, _node_to_block_mapping(arena)
 , _node_latency(NULL)
 #ifndef PRODUCT
 , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
@ -390,11 +376,10 @@ PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
  _goto->set_req(0,_goto);

  // Build the CFG in Reverse Post Order
-  _num_blocks = build_cfg();
-  _broot = get_block_for_node(_root);
+  _number_of_blocks = build_cfg();
+  _root_block = get_block_for_node(_root);
 }

-//------------------------------build_cfg--------------------------------------
 // Build a proper looking CFG.  Make every block begin with either a StartNode
 // or a RegionNode.  Make every block end with either a Goto, If or Return.
 // The RootNode both starts and ends it's own block.  Do this with a recursive
@ -496,13 +481,12 @@ uint PhaseCFG::build_cfg() {
  return sum;
 }

-//------------------------------insert_goto_at---------------------------------
 // Inserts a goto & corresponding basic block between
 // block[block_no] and its succ_no'th successor block
 void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
  // get block with block_no
-  assert(block_no < _num_blocks, "illegal block number");
-  Block* in  = _blocks[block_no];
+  assert(block_no < number_of_blocks(), "illegal block number");
+  Block* in  = get_block(block_no);
  // get successor block succ_no
  assert(succ_no < in->_num_succs, "illegal successor number");
  Block* out = in->_succs[succ_no];
@ -537,11 +521,9 @@ void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
  // Set the frequency of the new block
  block->_freq = freq;
  // add new basic block to basic block list
-  _blocks.insert(block_no + 1, block);
-  _num_blocks++;
+  add_block_at(block_no + 1, block);
 }

-//------------------------------no_flip_branch---------------------------------
 // Does this block end in a multiway branch that cannot have the default case
 // flipped for another case?
 static bool no_flip_branch( Block *b ) {
@ -560,7 +542,6 @@ static bool no_flip_branch( Block *b ) {
  return false;
 }

-//------------------------------convert_NeverBranch_to_Goto--------------------
 // Check for NeverBranch at block end.  This needs to become a GOTO to the
 // true target.  NeverBranch are treated as a conditional branch that always
 // goes the same direction for most of the optimizer and are used to give a
@ -598,7 +579,6 @@ void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
    dead->_nodes[k]->del_req(j);
 }

-//------------------------------move_to_next-----------------------------------
 // Helper function to move block bx to the slot following b_index. Return
 // true if the move is successful, otherwise false
 bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
@ -606,20 +586,22 @@ bool PhaseCFG::move_to_next(Block* bx, uint b_index) {

  // Return false if bx is already scheduled.
  uint bx_index = bx->_pre_order;
-  if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
+  if ((bx_index <= b_index) && (get_block(bx_index) == bx)) {
    return false;
  }

  // Find the current index of block bx on the block list
  bx_index = b_index + 1;
-  while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
-  assert(_blocks[bx_index] == bx, "block not found");
+  while (bx_index < number_of_blocks() && get_block(bx_index) != bx) {
+    bx_index++;
+  }
+  assert(get_block(bx_index) == bx, "block not found");

  // If the previous block conditionally falls into bx, return false,
  // because moving bx will create an extra jump.
  for(uint k = 1; k < bx->num_preds(); k++ ) {
    Block* pred = get_block_for_node(bx->pred(k));
-    if (pred == _blocks[bx_index-1]) {
+    if (pred == get_block(bx_index - 1)) {
      if (pred->_num_succs != 1) {
        return false;
      }
@ -632,7 +614,6 @@ bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
  return true;
 }

-//------------------------------move_to_end------------------------------------
 // Move empty and uncommon blocks to the end.
 void PhaseCFG::move_to_end(Block *b, uint i) {
  int e = b->is_Empty();
@ -650,31 +631,31 @@ void PhaseCFG::move_to_end(Block *b, uint i) {
  _blocks.push(b);
 }

-//---------------------------set_loop_alignment--------------------------------
 // Set loop alignment for every block
 void PhaseCFG::set_loop_alignment() {
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");

-  for (uint i = 1; i < last; i++ ) {
-    Block *b = _blocks[i];
-    if (b->head()->is_Loop()) {
-      b->set_loop_alignment(b);
+  for (uint i = 1; i < last; i++) {
+    Block* block = get_block(i);
+    if (block->head()->is_Loop()) {
+      block->set_loop_alignment(block);
    }
  }
 }

-//-----------------------------remove_empty------------------------------------
 // Make empty basic blocks to be "connector" blocks, Move uncommon blocks
 // to the end.
-void PhaseCFG::remove_empty() {
+void PhaseCFG::remove_empty_blocks() {
  // Move uncommon blocks to the end
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");

  for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_connector()) break;
+    Block* block = get_block(i);
+    if (block->is_connector()) {
+      break;
+    }

    // Check for NeverBranch at block end.  This needs to become a GOTO to the
    // true target.  NeverBranch are treated as a conditional branch that
@ -682,124 +663,127 @@ void PhaseCFG::remove_empty() {
    // to give a fake exit path to infinite loops.  At this late stage they
    // need to turn into Goto's so that when you enter the infinite loop you
    // indeed hang.
-    if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
-      convert_NeverBranch_to_Goto(b);
+    if (block->_nodes[block->end_idx()]->Opcode() == Op_NeverBranch) {
+      convert_NeverBranch_to_Goto(block);
+    }

    // Look for uncommon blocks and move to end.
    if (!C->do_freq_based_layout()) {
-      if (b->is_uncommon(this)) {
-        move_to_end(b, i);
+      if (block->is_uncommon(this)) {
+        move_to_end(block, i);
        last--;                   // No longer check for being uncommon!
-        if( no_flip_branch(b) ) { // Fall-thru case must follow?
-          b = _blocks[i];         // Find the fall-thru block
-          move_to_end(b, i);
+        if (no_flip_branch(block)) { // Fall-thru case must follow?
+          // Find the fall-thru block
+          block = get_block(i);
+          move_to_end(block, i);
          last--;
        }
-        i--;                      // backup block counter post-increment
+        // backup block counter post-increment
+        i--;
      }
    }
  }

  // Move empty blocks to the end
-  last = _num_blocks;
+  last = number_of_blocks();
  for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_Empty() != Block::not_empty) {
-      move_to_end(b, i);
+    Block* block = get_block(i);
+    if (block->is_Empty() != Block::not_empty) {
+      move_to_end(block, i);
      last--;
      i--;
    }
  } // End of for all blocks
 }

-//-----------------------------fixup_flow--------------------------------------
 // Fix up the final control flow for basic blocks.
 void PhaseCFG::fixup_flow() {
  // Fixup final control flow for the blocks.  Remove jump-to-next
  // block.  If neither arm of a IF follows the conditional branch, we
  // have to add a second jump after the conditional.  We place the
  // TRUE branch target in succs[0] for both GOTOs and IFs.
-  for (uint i=0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    b->_pre_order = i;          // turn pre-order into block-index
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->_pre_order = i;          // turn pre-order into block-index

    // Connector blocks need no further processing.
-    if (b->is_connector()) {
-      assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
-             "All connector blocks should sink to the end");
+    if (block->is_connector()) {
+      assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end");
      continue;
    }
-    assert(b->is_Empty() != Block::completely_empty,
-           "Empty blocks should be connectors");
+    assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors");

-    Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
-    Block *bs0 = b->non_connector_successor(0);
+    Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL;
+    Block* bs0 = block->non_connector_successor(0);

    // Check for multi-way branches where I cannot negate the test to
    // exchange the true and false targets.
-    if( no_flip_branch( b ) ) {
+    if (no_flip_branch(block)) {
      // Find fall through case - if must fall into its target
-      int branch_idx = b->_nodes.size() - b->_num_succs;
-      for (uint j2 = 0; j2 < b->_num_succs; j2++) {
-        const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
+      int branch_idx = block->_nodes.size() - block->_num_succs;
+      for (uint j2 = 0; j2 < block->_num_succs; j2++) {
+        const ProjNode* p = block->_nodes[branch_idx + j2]->as_Proj();
        if (p->_con == 0) {
          // successor j2 is fall through case
-          if (b->non_connector_successor(j2) != bnext) {
+          if (block->non_connector_successor(j2) != bnext) {
            // but it is not the next block => insert a goto
            insert_goto_at(i, j2);
          }
          // Put taken branch in slot 0
-          if( j2 == 0 && b->_num_succs == 2) {
+          if (j2 == 0 && block->_num_succs == 2) {
            // Flip targets in succs map
-            Block *tbs0 = b->_succs[0];
-            Block *tbs1 = b->_succs[1];
-            b->_succs.map( 0, tbs1 );
-            b->_succs.map( 1, tbs0 );
+            Block *tbs0 = block->_succs[0];
+            Block *tbs1 = block->_succs[1];
+            block->_succs.map(0, tbs1);
+            block->_succs.map(1, tbs0);
          }
          break;
        }
      }
-      // Remove all CatchProjs
-      for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();

-    } else if (b->_num_succs == 1) {
+      // Remove all CatchProjs
+      for (uint j = 0; j < block->_num_succs; j++) {
+        block->_nodes.pop();
+      }
+
+    } else if (block->_num_succs == 1) {
      // Block ends in a Goto?
      if (bnext == bs0) {
        // We fall into next block; remove the Goto
-        b->_nodes.pop();
+        block->_nodes.pop();
      }

-    } else if( b->_num_succs == 2 ) { // Block ends in a If?
+    } else if(block->_num_succs == 2) { // Block ends in a If?
      // Get opcode of 1st projection (matches _succs[0])
      // Note: Since this basic block has 2 exits, the last 2 nodes must
      //       be projections (in any order), the 3rd last node must be
      //       the IfNode (we have excluded other 2-way exits such as
      //       CatchNodes already).
-      MachNode *iff   = b->_nodes[b->_nodes.size()-3]->as_Mach();
-      ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
-      ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+      MachNode* iff   = block->_nodes[block->_nodes.size() - 3]->as_Mach();
+      ProjNode* proj0 = block->_nodes[block->_nodes.size() - 2]->as_Proj();
+      ProjNode* proj1 = block->_nodes[block->_nodes.size() - 1]->as_Proj();

      // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
-      assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
-      assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
+      assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");
+      assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");

-      Block *bs1 = b->non_connector_successor(1);
+      Block* bs1 = block->non_connector_successor(1);

      // Check for neither successor block following the current
      // block ending in a conditional. If so, move one of the
      // successors after the current one, provided that the
      // successor was previously unscheduled, but moveable
      // (i.e., all paths to it involve a branch).
-      if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
+      if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) {
        // Choose the more common successor based on the probability
        // of the conditional branch.
-        Block *bx = bs0;
-        Block *by = bs1;
+        Block* bx = bs0;
+        Block* by = bs1;

        // _prob is the probability of taking the true path. Make
        // p the probability of taking successor #1.
        float p = iff->as_MachIf()->_prob;
-        if( proj0->Opcode() == Op_IfTrue ) {
+        if (proj0->Opcode() == Op_IfTrue) {
          p = 1.0 - p;
        }

@ -826,14 +810,16 @@ void PhaseCFG::fixup_flow() {
      // succs[1].
      if (bnext == bs0) {
        // Fall-thru case in succs[0], so flip targets in succs map
-        Block *tbs0 = b->_succs[0];
-        Block *tbs1 = b->_succs[1];
-        b->_succs.map( 0, tbs1 );
-        b->_succs.map( 1, tbs0 );
+        Block* tbs0 = block->_succs[0];
+        Block* tbs1 = block->_succs[1];
+        block->_succs.map(0, tbs1);
+        block->_succs.map(1, tbs0);
        // Flip projection for each target
-        { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
+        ProjNode* tmp = proj0;
+        proj0 = proj1;
+        proj1 = tmp;

-      } else if( bnext != bs1 ) {
+      } else if(bnext != bs1) {
        // Need a double-branch
        // The existing conditional branch need not change.
        // Add a unconditional branch to the false target.
@ -843,12 +829,12 @@ void PhaseCFG::fixup_flow() {
      }

      // Make sure we TRUE branch to the target
-      if( proj0->Opcode() == Op_IfFalse ) {
+      if (proj0->Opcode() == Op_IfFalse) {
        iff->as_MachIf()->negate();
      }

-      b->_nodes.pop();          // Remove IfFalse & IfTrue projections
-      b->_nodes.pop();
+      block->_nodes.pop();          // Remove IfFalse & IfTrue projections
+      block->_nodes.pop();

    } else {
      // Multi-exit block, e.g. a switch statement
@ -858,7 +844,6 @@ void PhaseCFG::fixup_flow() {
 }


-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
  const Node *x = end->is_block_proj();
@ -884,10 +869,11 @@ void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
 }

 void PhaseCFG::dump( ) const {
-  tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
+  tty->print("\n--- CFG --- %d BBs\n", number_of_blocks());
  if (_blocks.size()) {        // Did we do basic-block layout?
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump(this);
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      const Block* block = get_block(i);
+      block->dump(this);
    }
  } else {                      // Else do it with a DFS
    VectorSet visited(_block_arena);
@ -896,27 +882,26 @@ void PhaseCFG::dump( ) const {
 }

 void PhaseCFG::dump_headers() {
-  for( uint i = 0; i < _num_blocks; i++ ) {
-    if (_blocks[i]) {
-      _blocks[i]->dump_head(this);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (block != NULL) {
+      block->dump_head(this);
    }
  }
 }

-void PhaseCFG::verify( ) const {
+void PhaseCFG::verify() const {
 #ifdef ASSERT
  // Verify sane CFG
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    uint cnt = b->_nodes.size();
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    uint cnt = block->_nodes.size();
    uint j;
    for (j = 0; j < cnt; j++)  {
-      Node *n = b->_nodes[j];
-      assert(get_block_for_node(n) == b, "");
-      if (j >= 1 && n->is_Mach() &&
-          n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
-        assert(j == 1 || b->_nodes[j-1]->is_Phi(),
-               "CreateEx must be first instruction in block");
+      Node *n = block->_nodes[j];
+      assert(get_block_for_node(n) == block, "");
+      if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
+        assert(j == 1 || block->_nodes[j-1]->is_Phi(), "CreateEx must be first instruction in block");
      }
      for (uint k = 0; k < n->req(); k++) {
        Node *def = n->in(k);
@ -926,8 +911,7 @@ void PhaseCFG::verify( ) const {
          // Uses must follow their definition if they are at the same block.
          // Mostly done to check that MachSpillCopy nodes are placed correctly
          // when CreateEx node is moved in build_ifg_physical().
-          if (get_block_for_node(def) == b &&
-              !(b->head()->is_Loop() && n->is_Phi()) &&
+          if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) &&
              // See (+++) comment in reg_split.cpp
              !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {
            bool is_loop = false;
@ -939,29 +923,29 @@ void PhaseCFG::verify( ) const {
                }
              }
            }
-            assert(is_loop || b->find_node(def) < j, "uses must follow definitions");
+            assert(is_loop || block->find_node(def) < j, "uses must follow definitions");
          }
        }
      }
    }

-    j = b->end_idx();
-    Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
-    assert( bp, "last instruction must be a block proj" );
-    assert( bp == b->_nodes[j], "wrong number of successors for this block" );
+    j = block->end_idx();
+    Node* bp = (Node*)block->_nodes[block->_nodes.size() - 1]->is_block_proj();
+    assert(bp, "last instruction must be a block proj");
+    assert(bp == block->_nodes[j], "wrong number of successors for this block");
    if (bp->is_Catch()) {
-      while (b->_nodes[--j]->is_MachProj()) ;
-      assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call");
+      while (block->_nodes[--j]->is_MachProj()) {
+        ;
+      }
+      assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call");
    } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {
-      assert(b->_num_succs == 2, "Conditional branch must have two targets");
+      assert(block->_num_succs == 2, "Conditional branch must have two targets");
    }
  }
 #endif
 }
 #endif

-//=============================================================================
-//------------------------------UnionFind--------------------------------------
 UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
  Copy::zero_to_bytes( _indices, sizeof(uint)*max );
 }
@ -986,7 +970,6 @@ void UnionFind::reset( uint max ) {
  for( uint i=0; i<max; i++ ) map(i,i);
 }

-//------------------------------Find_compress----------------------------------
 // Straight out of Tarjan's union-find algorithm
 uint UnionFind::Find_compress( uint idx ) {
  uint cur  = idx;
@ -1006,7 +989,6 @@ uint UnionFind::Find_compress( uint idx ) {
  return idx;
 }

-//------------------------------Find_const-------------------------------------
 // Like Find above, but no path compress, so bad asymptotic behavior
 uint UnionFind::Find_const( uint idx ) const {
  if( idx == 0 ) return idx;    // Ignore the zero idx
@ -1021,7 +1003,6 @@ uint UnionFind::Find_const( uint idx ) const {
  return next;
 }

-//------------------------------Union------------------------------------------
 // union 2 sets together.
 void UnionFind::Union( uint idx1, uint idx2 ) {
  uint src = Find(idx1);
@ -1070,9 +1051,6 @@ void CFGEdge::dump( ) const {
 }
 #endif

-//=============================================================================
-
-//------------------------------edge_order-------------------------------------
 // Comparison function for edges
 static int edge_order(CFGEdge **e0, CFGEdge **e1) {
  float freq0 = (*e0)->freq();
@ -1087,7 +1065,6 @@ static int edge_order(CFGEdge **e0, CFGEdge **e1) {
  return dist1 - dist0;
 }

-//------------------------------trace_frequency_order--------------------------
 // Comparison function for edges
 extern "C" int trace_frequency_order(const void *p0, const void *p1) {
  Trace *tr0 = *(Trace **) p0;
@ -1113,17 +1090,15 @@ extern "C" int trace_frequency_order(const void *p0, const void *p1) {
  return diff;
 }

-//------------------------------find_edges-------------------------------------
 // Find edges of interest, i.e, those which can fall through. Presumes that
 // edges which don't fall through are of low frequency and can be generally
 // ignored.  Initialize the list of traces.
-void PhaseBlockLayout::find_edges()
-{
+void PhaseBlockLayout::find_edges() {
  // Walk the blocks, creating edges and Traces
  uint i;
  Trace *tr = NULL;
-  for (i = 0; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* b = _cfg.get_block(i);
    tr = new Trace(b, next, prev);
    traces[tr->id()] = tr;

@ -1147,7 +1122,7 @@ void PhaseBlockLayout::find_edges()
      if (n->num_preds() != 1) break;

      i++;
-      assert(n = _cfg._blocks[i], "expecting next block");
+      assert(n = _cfg.get_block(i), "expecting next block");
      tr->append(n);
      uf->map(n->_pre_order, tr->id());
      traces[n->_pre_order] = NULL;
@ -1171,8 +1146,8 @@ void PhaseBlockLayout::find_edges()
  }

  // Group connector blocks into one trace
-  for (i++; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i++; i < _cfg.number_of_blocks(); i++) {
+    Block *b = _cfg.get_block(i);
    assert(b->is_connector(), "connector blocks at the end");
    tr->append(b);
    uf->map(b->_pre_order, tr->id());
@ -1180,10 +1155,8 @@ void PhaseBlockLayout::find_edges()
  }
 }

-//------------------------------union_traces----------------------------------
 // Union two traces together in uf, and null out the trace in the list
-void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
-{
+void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) {
  uint old_id = old_trace->id();
  uint updated_id = updated_trace->id();

@ -1207,10 +1180,8 @@ void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
  traces[hi_id] = NULL;
 }

-//------------------------------grow_traces-------------------------------------
 // Append traces together via the most frequently executed edges
-void PhaseBlockLayout::grow_traces()
-{
+void PhaseBlockLayout::grow_traces() {
  // Order the edges, and drive the growth of Traces via the most
  // frequently executed edges.
  edges->sort(edge_order);
@ -1252,11 +1223,9 @@ void PhaseBlockLayout::grow_traces()
  }
 }

-//------------------------------merge_traces-----------------------------------
 // Embed one trace into another, if the fork or join points are sufficiently
 // balanced.
-void PhaseBlockLayout::merge_traces(bool fall_thru_only)
-{
+void PhaseBlockLayout::merge_traces(bool fall_thru_only) {
  // Walk the edge list a another time, looking at unprocessed edges.
  // Fold in diamonds
  for (int i = 0; i < edges->length(); i++) {
@ -1310,7 +1279,7 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
        src_trace->insert_after(src_block, targ_trace);
        union_traces(src_trace, targ_trace);
      } else if (src_at_tail) {
-        if (src_trace != trace(_cfg._broot)) {
+        if (src_trace != trace(_cfg.get_root_block())) {
          e->set_state(CFGEdge::connected);
          targ_trace->insert_before(targ_block, src_trace);
          union_traces(targ_trace, src_trace);
@ -1319,7 +1288,7 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
    } else if (e->state() == CFGEdge::open) {
      // Append traces, even without a fall-thru connection.
      // But leave root entry at the beginning of the block list.
-      if (targ_trace != trace(_cfg._broot)) {
+      if (targ_trace != trace(_cfg.get_root_block())) {
        e->set_state(CFGEdge::connected);
        src_trace->append(targ_trace);
        union_traces(src_trace, targ_trace);
@ -1328,11 +1297,9 @@ void PhaseBlockLayout::merge_traces(bool fall_thru_only)
  }
 }

-//----------------------------reorder_traces-----------------------------------
 // Order the sequence of the traces in some desirable way, and fixup the
 // jumps at the end of each block.
-void PhaseBlockLayout::reorder_traces(int count)
-{
+void PhaseBlockLayout::reorder_traces(int count) {
  ResourceArea *area = Thread::current()->resource_area();
  Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
  Block_List worklist;
@ -1347,15 +1314,14 @@ void PhaseBlockLayout::reorder_traces(int count)
  }

  // The entry block should be first on the new trace list.
-  Trace *tr = trace(_cfg._broot);
+  Trace *tr = trace(_cfg.get_root_block());
  assert(tr == new_traces[0], "entry trace misplaced");

  // Sort the new trace list by frequency
  qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);

  // Patch up the successor blocks
-  _cfg._blocks.reset();
-  _cfg._num_blocks = 0;
+  _cfg.clear_blocks();
  for (int i = 0; i < new_count; i++) {
    Trace *tr = new_traces[i];
    if (tr != NULL) {
@ -1364,17 +1330,15 @@ void PhaseBlockLayout::reorder_traces(int count)
  }
 }

-//------------------------------PhaseBlockLayout-------------------------------
 // Order basic blocks based on frequency
-PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
-  Phase(BlockLayout),
-  _cfg(cfg)
-{
+PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg)
+: Phase(BlockLayout)
+, _cfg(cfg) {
  ResourceMark rm;
  ResourceArea *area = Thread::current()->resource_area();

  // List of traces
-  int size = _cfg._num_blocks + 1;
+  int size = _cfg.number_of_blocks() + 1;
  traces = NEW_ARENA_ARRAY(area, Trace *, size);
  memset(traces, 0, size*sizeof(Trace*));
  next = NEW_ARENA_ARRAY(area, Block *, size);
@ -1407,11 +1371,10 @@ PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
  // Re-order all the remaining traces by frequency
  reorder_traces(size);

-  assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
+  assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink");
 }


-//------------------------------backedge---------------------------------------
 // Edge e completes a loop in a trace. If the target block is head of the
 // loop, rotate the loop block so that the loop ends in a conditional branch.
 bool Trace::backedge(CFGEdge *e) {
@ -1463,14 +1426,12 @@ bool Trace::backedge(CFGEdge *e) {
  return loop_rotated;
 }

-//------------------------------fixup_blocks-----------------------------------
 // push blocks onto the CFG list
 // ensure that blocks have the correct two-way branch sense
 void Trace::fixup_blocks(PhaseCFG &cfg) {
  Block *last = last_block();
  for (Block *b = first_block(); b != NULL; b = next(b)) {
-    cfg._blocks.push(b);
-    cfg._num_blocks++;
+    cfg.add_block(b);
    if (!b->is_connector()) {
      int nfallthru = b->num_fall_throughs();
      if (b != last) {
--- a/hotspot/src/share/vm/opto/block.hpp
+++ b/hotspot/src/share/vm/opto/block.hpp
@ -348,20 +348,77 @@ class Block : public CFGElement {
 class PhaseCFG : public Phase {
  friend class VMStructs;
 private:
+
+  // Root of whole program
+  RootNode* _root;
+
+  // The block containing the root node
+  Block* _root_block;
+
+  // List of basic blocks that are created during CFG creation
+  Block_List _blocks;
+
+  // Count of basic blocks
+  uint _number_of_blocks;
+
  // Arena for the blocks to be stored in
  Arena* _block_arena;

+  // The matcher for this compilation
+  Matcher& _matcher;
+
  // Map nodes to owning basic block
  Block_Array _node_to_block_mapping;

+  // Loop from the root
+  CFGLoop* _root_loop;
+
+  // Outmost loop frequency
+  float _outer_loop_frequency;
+
+  // Per node latency estimation, valid only during GCM
+  GrowableArray<uint>* _node_latency;
+
  // Build a proper looking cfg.  Return count of basic blocks
  uint build_cfg();

-  // Perform DFS search.
+  // Build the dominator tree so that we know where we can move instructions
+  void build_dominator_tree();
+
+  // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
+  void estimate_block_frequency();
+
+  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
+  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
+  // Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
+  void global_code_motion();
+
+  // Schedule Nodes early in their basic blocks.
+  bool schedule_early(VectorSet &visited, Node_List &roots);
+
+  // For each node, find the latest block it can be scheduled into
+  // and then select the cheapest block between the latest and earliest
+  // block to place the node.
+  void schedule_late(VectorSet &visited, Node_List &stack);
+
+  // Compute the (backwards) latency of a node from a single use
+  int latency_from_use(Node *n, const Node *def, Node *use);
+
+  // Compute the (backwards) latency of a node from the uses of this instruction
+  void partial_latency_of_defs(Node *n);
+
+  // Compute the instruction global latency with a backwards walk
+  void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
+
+  // Pick a block between early and late that is a cheaper alternative
+  // to late. Helper for schedule_late.
+  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+  // Perform a Depth First Search (DFS).
  // Setup 'vertex' as DFS to vertex mapping.
  // Setup 'semi' as vertex to DFS mapping.
  // Set 'parent' to DFS parent.
-  uint DFS( Tarjan *tarjan );
+  uint do_DFS(Tarjan* tarjan, uint rpo_counter);

  // Helper function to insert a node into a block
  void schedule_node_into_block( Node *n, Block *b );
@ -372,7 +429,8 @@ class PhaseCFG : public Phase {
  void schedule_pinned_nodes( VectorSet &visited );

  // I'll need a few machine-specific GotoNodes.  Clone from this one.
-  MachNode *_goto;
+  // Used when building the CFG and creating end nodes for blocks.
+  MachNode* _goto;

  Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
  void verify_anti_dependences(Block* LCA, Node* load) {
@ -380,17 +438,77 @@ class PhaseCFG : public Phase {
    insert_anti_dependences(LCA, load, true);
  }

+  bool move_to_next(Block* bx, uint b_index);
+  void move_to_end(Block* bx, uint b_index);
+
+  void insert_goto_at(uint block_no, uint succ_no);
+
+  // Check for NeverBranch at block end.  This needs to become a GOTO to the
+  // true target.  NeverBranch are treated as a conditional branch that always
+  // goes the same direction for most of the optimizer and are used to give a
+  // fake exit path to infinite loops.  At this late stage they need to turn
+  // into Goto's so that when you enter the infinite loop you indeed hang.
+  void convert_NeverBranch_to_Goto(Block *b);
+
+  CFGLoop* create_loop_tree();
+
+  #ifndef PRODUCT
+  bool _trace_opto_pipelining;  // tracing flag
+  #endif
+
 public:
  PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);

-  uint _num_blocks;             // Count of basic blocks
-  Block_List _blocks;           // List of basic blocks
-  RootNode *_root;              // Root of whole program
-  Block *_broot;                // Basic block of root
-  uint _rpo_ctr;
-  CFGLoop* _root_loop;
-  float _outer_loop_freq;       // Outmost loop frequency
+  void set_latency_for_node(Node* node, int latency) {
+    _node_latency->at_put_grow(node->_idx, latency);
+  }

+  uint get_latency_for_node(Node* node) {
+    return _node_latency->at_grow(node->_idx);
+  }
+
+  // Get the outer most frequency
+  float get_outer_loop_frequency() const {
+    return _outer_loop_frequency;
+  }
+
+  // Get the root node of the CFG
+  RootNode* get_root_node() const {
+    return _root;
+  }
+
+  // Get the block of the root node
+  Block* get_root_block() const {
+    return _root_block;
+  }
+
+  // Add a block at a position and moves the later ones one step
+  void add_block_at(uint pos, Block* block) {
+    _blocks.insert(pos, block);
+    _number_of_blocks++;
+  }
+
+  // Adds a block to the top of the block list
+  void add_block(Block* block) {
+    _blocks.push(block);
+    _number_of_blocks++;
+  }
+
+  // Clear the list of blocks
+  void clear_blocks() {
+    _blocks.reset();
+    _number_of_blocks = 0;
+  }
+
+  // Get the block at position pos in _blocks
+  Block* get_block(uint pos) const {
+    return _blocks[pos];
+  }
+
+  // Number of blocks
+  uint number_of_blocks() const {
+    return _number_of_blocks;
+  }

  // set which block this node should reside in
  void map_node_to_block(const Node* node, Block* block) {
@ -412,72 +530,26 @@ class PhaseCFG : public Phase {
    return (_node_to_block_mapping.lookup(node->_idx) != NULL);
  }

-  // Per node latency estimation, valid only during GCM
-  GrowableArray<uint> *_node_latency;
-
-#ifndef PRODUCT
-  bool _trace_opto_pipelining;  // tracing flag
-#endif
-
 #ifdef ASSERT
  Unique_Node_List _raw_oops;
 #endif

-  // Build dominators
-  void Dominators();
-
-  // Estimate block frequencies based on IfNode probabilities
-  void Estimate_Block_Frequency();
-
-  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
-  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
-  void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
+  // Do global code motion by first building dominator tree and estimate block frequency
+  // Returns true on success
+  bool do_global_code_motion();

  // Compute the (backwards) latency of a node from the uses
  void latency_from_uses(Node *n);

-  // Compute the (backwards) latency of a node from a single use
-  int latency_from_use(Node *n, const Node *def, Node *use);
-
-  // Compute the (backwards) latency of a node from the uses of this instruction
-  void partial_latency_of_defs(Node *n);
-
-  // Schedule Nodes early in their basic blocks.
-  bool schedule_early(VectorSet &visited, Node_List &roots);
-
-  // For each node, find the latest block it can be scheduled into
-  // and then select the cheapest block between the latest and earliest
-  // block to place the node.
-  void schedule_late(VectorSet &visited, Node_List &stack);
-
-  // Pick a block between early and late that is a cheaper alternative
-  // to late. Helper for schedule_late.
-  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
-
-  // Compute the instruction global latency with a backwards walk
-  void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
-
  // Set loop alignment
  void set_loop_alignment();

  // Remove empty basic blocks
-  void remove_empty();
+  void remove_empty_blocks();
  void fixup_flow();
-  bool move_to_next(Block* bx, uint b_index);
-  void move_to_end(Block* bx, uint b_index);
-  void insert_goto_at(uint block_no, uint succ_no);

-  // Check for NeverBranch at block end.  This needs to become a GOTO to the
-  // true target.  NeverBranch are treated as a conditional branch that always
-  // goes the same direction for most of the optimizer and are used to give a
-  // fake exit path to infinite loops.  At this late stage they need to turn
-  // into Goto's so that when you enter the infinite loop you indeed hang.
-  void convert_NeverBranch_to_Goto(Block *b);
-
-  CFGLoop* create_loop_tree();
-
-  // Insert a node into a block, and update the _bbs
-  void insert( Block *b, uint idx, Node *n ) {
+  // Insert a node into a block at index and map the node to the block
+  void insert(Block *b, uint idx, Node *n) {
    b->_nodes.insert( idx, n );
    map_node_to_block(n, b);
  }
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp
@ -87,7 +87,6 @@
 // OptoReg::Bad for not-callee-saved.


-//------------------------------OopFlow----------------------------------------
 // Structure to pass around
 struct OopFlow : public ResourceObj {
  short *_callees;              // Array mapping register to callee-saved
@ -119,7 +118,6 @@ struct OopFlow : public ResourceObj {
  OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
 };

-//------------------------------compute_reach----------------------------------
 // Given reaching-defs for this block start, compute it for this block end
 void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {

@ -177,7 +175,6 @@ void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehas
  }
 }

-//------------------------------merge------------------------------------------
 // Merge the given flow into the 'this' flow
 void OopFlow::merge( OopFlow *flow, int max_reg ) {
  assert( _b == NULL, "merging into a happy flow" );
@ -197,14 +194,12 @@ void OopFlow::merge( OopFlow *flow, int max_reg ) {

 }

-//------------------------------clone------------------------------------------
 void OopFlow::clone( OopFlow *flow, int max_size ) {
  _b = flow->_b;
  memcpy( _callees, flow->_callees, sizeof(short)*max_size);
  memcpy( _defs   , flow->_defs   , sizeof(Node*)*max_size);
 }

-//------------------------------make-------------------------------------------
 OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
  short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
  Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1);
@ -215,7 +210,6 @@ OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
  return flow;
 }

-//------------------------------bit twiddlers----------------------------------
 static int get_live_bit( int *live, int reg ) {
  return live[reg>>LogBitsPerInt] &   (1<<(reg&(BitsPerInt-1))); }
 static void set_live_bit( int *live, int reg ) {
@ -223,7 +217,6 @@ static void set_live_bit( int *live, int reg ) {
 static void clr_live_bit( int *live, int reg ) {
         live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }

-//------------------------------build_oop_map----------------------------------
 // Build an oopmap from the current flow info
 OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
  int framesize = regalloc->_framesize;
@ -412,19 +405,18 @@ OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, i
  return omap;
 }

-//------------------------------do_liveness------------------------------------
 // Compute backwards liveness on registers
-static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
-  int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
-  int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
-  Node *root = cfg->C->root();
+static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) {
+  int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints);
+  int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints];
+  Node* root = cfg->get_root_node();
  // On CISC platforms, get the node representing the stack pointer  that regalloc
  // used for spills
  Node *fp = NodeSentinel;
  if (UseCISCSpill && root->req() > 1) {
    fp = root->in(1)->in(TypeFunc::FramePtr);
  }
-  memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
+  memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt));
  // Push preds onto worklist
  for (uint i = 1; i < root->req(); i++) {
    Block* block = cfg->get_block_for_node(root->in(i));
@ -549,29 +541,32 @@ static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *wor
    // Scan for any missing safepoints.  Happens to infinite loops
    // ala ZKM.jar
    uint i;
-    for( i=1; i<cfg->_num_blocks; i++ ) {
-      Block *b = cfg->_blocks[i];
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      Block* block = cfg->get_block(i);
      uint j;
-      for( j=1; j<b->_nodes.size(); j++ )
-        if( b->_nodes[j]->jvms() &&
-            (*safehash)[b->_nodes[j]] == NULL )
+      for (j = 1; j < block->_nodes.size(); j++) {
+        if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) {
           break;
-      if( j<b->_nodes.size() ) break;
+        }
+      }
+      if (j < block->_nodes.size()) {
+        break;
+      }
    }
-    if( i == cfg->_num_blocks )
+    if (i == cfg->number_of_blocks()) {
      break;                    // Got 'em all
+    }
 #ifndef PRODUCT
    if( PrintOpto && Verbose )
      tty->print_cr("retripping live calc");
 #endif
    // Force the issue (expensively): recheck everybody
-    for( i=1; i<cfg->_num_blocks; i++ )
-      worklist->push(cfg->_blocks[i]);
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      worklist->push(cfg->get_block(i));
+    }
  }
-
 }

-//------------------------------BuildOopMaps-----------------------------------
 // Collect GC mask info - where are all the OOPs?
 void Compile::BuildOopMaps() {
  NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
@ -592,12 +587,12 @@ void Compile::BuildOopMaps() {
  OopFlow *free_list = NULL;    // Free, unused

  // Array mapping blocks to completed oopflows
-  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
-  memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
+  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks());
+  memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) );


  // Do the first block 'by hand' to prime the worklist
-  Block *entry = _cfg->_blocks[1];
+  Block *entry = _cfg->get_block(1);
  OopFlow *rootflow = OopFlow::make(A,max_reg,this);
  // Initialize to 'bottom' (not 'top')
  memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
@ -623,7 +618,9 @@ void Compile::BuildOopMaps() {

    Block *b = worklist.pop();
    // Ignore root block
-    if( b == _cfg->_broot ) continue;
+    if (b == _cfg->get_root_block()) {
+      continue;
+    }
    // Block is already done?  Happens if block has several predecessors,
    // he can get on the worklist more than once.
    if( flows[b->_pre_order] ) continue;
--- a/hotspot/src/share/vm/opto/chaitin.cpp
+++ b/hotspot/src/share/vm/opto/chaitin.cpp
@ -40,10 +40,8 @@
 #include "opto/opcodes.hpp"
 #include "opto/rootnode.hpp"

-//=============================================================================
-
 #ifndef PRODUCT
-void LRG::dump( ) const {
+void LRG::dump() const {
  ttyLocker ttyl;
  tty->print("%d ",num_regs());
  _mask.dump();
@ -94,7 +92,6 @@ void LRG::dump( ) const {
 }
 #endif

-//------------------------------score------------------------------------------
 // Compute score from cost and area.  Low score is best to spill.
 static double raw_score( double cost, double area ) {
  return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
@ -125,7 +122,6 @@ double LRG::score() const {
  return score;
 }

-//------------------------------LRG_List---------------------------------------
 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
  memset( _lidxs, 0, sizeof(uint)*max );
 }
@ -211,7 +207,6 @@ uint LiveRangeMap::find_const(uint lrg) const {
  return next;
 }

-//------------------------------Chaitin----------------------------------------
 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
  : PhaseRegAlloc(unique, cfg, matcher,
 #ifndef PRODUCT
@ -232,31 +227,31 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
 {
  NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )

-  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
+  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());

  // Build a list of basic blocks, sorted by frequency
-  _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+  _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
  // Experiment with sorting strategies to speed compilation
  double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
  Block **buckets[NUMBUCKS];             // Array of buckets
  uint    buckcnt[NUMBUCKS];             // Array of bucket counters
  double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
  for (uint i = 0; i < NUMBUCKS; i++) {
-    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg._num_blocks);
+    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
    buckcnt[i] = 0;
    // Bump by three orders of magnitude each time
    cutoff *= 0.001;
    buckval[i] = cutoff;
-    for (uint j = 0; j < _cfg._num_blocks; j++) {
+    for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
      buckets[i][j] = NULL;
    }
  }
  // Sort blocks into buckets
-  for (uint i = 0; i < _cfg._num_blocks; i++) {
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
    for (uint j = 0; j < NUMBUCKS; j++) {
-      if ((j == NUMBUCKS - 1) || (_cfg._blocks[i]->_freq > buckval[j])) {
+      if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
        // Assign block to end of list for appropriate bucket
-        buckets[j][buckcnt[j]++] = _cfg._blocks[i];
+        buckets[j][buckcnt[j]++] = _cfg.get_block(i);
        break; // kick out of inner loop
      }
    }
@ -269,10 +264,9 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
    }
  }

-  assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
+  assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
 }

-//------------------------------Union------------------------------------------
 // union 2 sets together.
 void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
  uint src = _lrg_map.find(src_n);
@ -285,7 +279,6 @@ void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
  _lrg_map.uf_map(dst, src);
 }

-//------------------------------new_lrg----------------------------------------
 void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
  // Make the Node->LRG mapping
  _lrg_map.extend(x->_idx,lrg);
@ -294,24 +287,28 @@ void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
 }


-bool PhaseChaitin::clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id) {
-  Block* bcon = _cfg.get_block_for_node(con);
-  uint cindex = bcon->find_node(con);
-  Node *con_next = bcon->_nodes[cindex+1];
-  if (con_next->in(0) != con || !con_next->is_MachProj()) {
-    return false;               // No MachProj's follow
+int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
+  assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
+  DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )
+  int found_projs = 0;
+  uint cnt = orig->outcnt();
+  for (uint i = 0; i < cnt; i++) {
+    Node* proj = orig->raw_out(i);
+    if (proj->is_MachProj()) {
+      assert(proj->outcnt() == 0, "only kill projections are expected here");
+      assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
+      found_projs++;
+      // Copy kill projections after the cloned node
+      Node* kills = proj->clone();
+      kills->set_req(0, copy);
+      b->_nodes.insert(idx++, kills);
+      _cfg.map_node_to_block(kills, b);
+      new_lrg(kills, max_lrg_id++);
+    }
  }
-
-  // Copy kills after the cloned constant
-  Node *kills = con_next->clone();
-  kills->set_req(0, copy);
-  b->_nodes.insert(idx, kills);
-  _cfg.map_node_to_block(kills, b);
-  new_lrg(kills, max_lrg_id);
-  return true;
+  return found_projs;
 }

-//------------------------------compact----------------------------------------
 // Renumber the live ranges to compact them.  Makes the IFG smaller.
 void PhaseChaitin::compact() {
  // Current the _uf_map contains a series of short chains which are headed
@ -677,20 +674,19 @@ void PhaseChaitin::Register_Allocate() {
  C->set_indexSet_arena(NULL);  // ResourceArea is at end of scope
 }

-//------------------------------de_ssa-----------------------------------------
 void PhaseChaitin::de_ssa() {
  // Set initial Names for all Nodes.  Most Nodes get the virtual register
  // number.  A few get the ZERO live range number.  These do not
  // get allocated, but instead rely on correct scheduling to ensure that
  // only one instance is simultaneously live at a time.
  uint lr_counter = 1;
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    uint cnt = b->_nodes.size();
+  for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
+    Block* block = _cfg.get_block(i);
+    uint cnt = block->_nodes.size();

    // Handle all the normal Nodes in the block
    for( uint j = 0; j < cnt; j++ ) {
-      Node *n = b->_nodes[j];
+      Node *n = block->_nodes[j];
      // Pre-color to the zero live range, or pick virtual register
      const RegMask &rm = n->out_RegMask();
      _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
@ -701,52 +697,55 @@ void PhaseChaitin::de_ssa() {
 }


-//------------------------------gather_lrg_masks-------------------------------
 // Gather LiveRanGe information, including register masks.  Modification of
 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {

  // Nail down the frame pointer live range
-  uint fp_lrg = _lrg_map.live_range_id(_cfg._root->in(1)->in(TypeFunc::FramePtr));
+  uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
  lrgs(fp_lrg)._cost += 1e12;   // Cost is infinite

  // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);

    // For all instructions
-    for( uint j = 1; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for (uint j = 1; j < block->_nodes.size(); j++) {
+      Node* n = block->_nodes[j];
      uint input_edge_start =1; // Skip control most nodes
-      if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
+      if (n->is_Mach()) {
+        input_edge_start = n->as_Mach()->oper_input_base();
+      }
      uint idx = n->is_Copy();

      // Get virtual register number, same as LiveRanGe index
      uint vreg = _lrg_map.live_range_id(n);
-      LRG &lrg = lrgs(vreg);
-      if( vreg ) {              // No vreg means un-allocable (e.g. memory)
+      LRG& lrg = lrgs(vreg);
+      if (vreg) {              // No vreg means un-allocable (e.g. memory)

        // Collect has-copy bit
-        if( idx ) {
+        if (idx) {
          lrg._has_copy = 1;
          uint clidx = _lrg_map.live_range_id(n->in(idx));
-          LRG &copy_src = lrgs(clidx);
+          LRG& copy_src = lrgs(clidx);
          copy_src._has_copy = 1;
        }

        // Check for float-vs-int live range (used in register-pressure
        // calculations)
        const Type *n_type = n->bottom_type();
-        if (n_type->is_floatingpoint())
+        if (n_type->is_floatingpoint()) {
          lrg._is_float = 1;
+        }

        // Check for twice prior spilling.  Once prior spilling might have
        // spilled 'soft', 2nd prior spill should have spilled 'hard' and
        // further spilling is unlikely to make progress.
-        if( _spilled_once.test(n->_idx) ) {
+        if (_spilled_once.test(n->_idx)) {
          lrg._was_spilled1 = 1;
-          if( _spilled_twice.test(n->_idx) )
+          if (_spilled_twice.test(n->_idx)) {
            lrg._was_spilled2 = 1;
+          }
        }

 #ifndef PRODUCT
@ -783,16 +782,18 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {

        // Check for bound register masks
        const RegMask &lrgmask = lrg.mask();
-        if (lrgmask.is_bound(ireg))
+        if (lrgmask.is_bound(ireg)) {
          lrg._is_bound = 1;
+        }

        // Check for maximum frequency value
-        if (lrg._maxfreq < b->_freq)
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }

        // Check for oop-iness, or long/double
        // Check for multi-kill projection
-        switch( ireg ) {
+        switch (ireg) {
        case MachProjNode::fat_proj:
          // Fat projections have size equal to number of registers killed
          lrg.set_num_regs(rm.Size());
@ -962,7 +963,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
        // AggressiveCoalesce.  This effectively pre-virtual-splits
        // around uncommon uses of common defs.
        const RegMask &rm = n->in_RegMask(k);
-        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * b->_freq) {
+        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
          // Since we are BEFORE aggressive coalesce, leave the register
          // mask untrimmed by the call.  This encourages more coalescing.
          // Later, AFTER aggressive, this live range will have to spill
@ -1006,8 +1007,9 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
        }

        // Check for maximum frequency value
-        if( lrg._maxfreq < b->_freq )
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }

      } // End for all allocated inputs
    } // end for all instructions
@ -1029,7 +1031,6 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
  }
 }

-//------------------------------set_was_low------------------------------------
 // Set the was-lo-degree bit.  Conservative coalescing should not change the
 // colorability of the graph.  If any live range was of low-degree before
 // coalescing, it should Simplify.  This call sets the was-lo-degree bit.
@ -1066,7 +1067,6 @@ void PhaseChaitin::set_was_low() {

 #define REGISTER_CONSTRAINED 16

-//------------------------------cache_lrg_info---------------------------------
 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
 void PhaseChaitin::cache_lrg_info( ) {

@ -1100,7 +1100,6 @@ void PhaseChaitin::cache_lrg_info( ) {
  }
 }

-//------------------------------Pre-Simplify-----------------------------------
 // Simplify the IFG by removing LRGs of low degree that have NO copies
 void PhaseChaitin::Pre_Simplify( ) {

@ -1151,7 +1150,6 @@ void PhaseChaitin::Pre_Simplify( ) {
  // No more lo-degree no-copy live ranges to simplify
 }

-//------------------------------Simplify---------------------------------------
 // Simplify the IFG by removing LRGs of low degree.
 void PhaseChaitin::Simplify( ) {

@ -1288,7 +1286,6 @@ void PhaseChaitin::Simplify( ) {

 }

-//------------------------------is_legal_reg-----------------------------------
 // Is 'reg' register legal for 'lrg'?
 static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
  if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
@ -1315,7 +1312,6 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
  return false;
 }

-//------------------------------bias_color-------------------------------------
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {

@ -1377,7 +1373,6 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
  return OptoReg::add( reg, chunk );
 }

-//------------------------------choose_color-----------------------------------
 // Choose a color in the current chunk
 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
  assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
@ -1399,7 +1394,6 @@ OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
  return lrg.mask().find_last_elem();
 }

-//------------------------------Select-----------------------------------------
 // Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
 // in reverse order of removal.  As long as nothing of hi-degree was yanked,
 // everything going back is guaranteed a color.  Select that color.  If some
@ -1574,8 +1568,6 @@ uint PhaseChaitin::Select( ) {
  return spill_reg-LRG::SPILL_REG;      // Return number of spills
 }

-
-//------------------------------copy_was_spilled-------------------------------
 // Copy 'was_spilled'-edness from the source Node to the dst Node.
 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
  if( _spilled_once.test(src->_idx) ) {
@ -1588,14 +1580,12 @@ void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
  }
 }

-//------------------------------set_was_spilled--------------------------------
 // Set the 'spilled_once' or 'spilled_twice' flag on a node.
 void PhaseChaitin::set_was_spilled( Node *n ) {
  if( _spilled_once.test_set(n->_idx) )
    _spilled_twice.set(n->_idx);
 }

-//------------------------------fixup_spills-----------------------------------
 // Convert Ideal spill instructions into proper FramePtr + offset Loads and
 // Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
 void PhaseChaitin::fixup_spills() {
@ -1605,16 +1595,16 @@ void PhaseChaitin::fixup_spills() {
  NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )

  // Grab the Frame Pointer
-  Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
+  Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);

  // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);

    // For all instructions in block
-    uint last_inst = b->end_idx();
-    for( uint j = 1; j <= last_inst; j++ ) {
-      Node *n = b->_nodes[j];
+    uint last_inst = block->end_idx();
+    for (uint j = 1; j <= last_inst; j++) {
+      Node* n = block->_nodes[j];

      // Dead instruction???
      assert( n->outcnt() != 0 ||// Nothing dead after post alloc
@ -1651,7 +1641,7 @@ void PhaseChaitin::fixup_spills() {
            assert( cisc->oper_input_base() == 2, "Only adding one edge");
            cisc->ins_req(1,src);         // Requires a memory edge
          }
-          b->_nodes.map(j,cisc);          // Insert into basic block
+          block->_nodes.map(j,cisc);          // Insert into basic block
          n->subsume_by(cisc, C); // Correct graph
          //
          ++_used_cisc_instructions;
@ -1677,7 +1667,6 @@ void PhaseChaitin::fixup_spills() {
  } // End of for all blocks
 }

-//------------------------------find_base_for_derived--------------------------
 // Helper to stretch above; recursively discover the base Node for a
 // given derived Node.  Easy for AddP-related machine nodes, but needs
 // to be recursive for derived Phis.
@ -1707,7 +1696,7 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
      // Initialize it once and make it shared:
      // set control to _root and place it into Start block
      // (where top() node is placed).
-      base->init_req(0, _cfg._root);
+      base->init_req(0, _cfg.get_root_node());
      Block *startb = _cfg.get_block_for_node(C->top());
      startb->_nodes.insert(startb->find_node(C->top()), base );
      _cfg.map_node_to_block(base, startb);
@ -1716,7 +1705,7 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
    if (_lrg_map.live_range_id(base) == 0) {
      new_lrg(base, maxlrg++);
    }
-    assert(base->in(0) == _cfg._root && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
+    assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
    derived_base_map[derived->_idx] = base;
    return base;
  }
@ -1779,8 +1768,6 @@ Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derive
  return base;
 }

-
-//------------------------------stretch_base_pointer_live_ranges---------------
 // At each Safepoint, insert extra debug edges for each pair of derived value/
 // base pointer that is live across the Safepoint for oopmap building.  The
 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
@ -1792,14 +1779,14 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
  memset( derived_base_map, 0, sizeof(Node*)*C->unique() );

  // For all blocks in RPO do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
    // Note use of deep-copy constructor.  I cannot hammer the original
    // liveout bits, because they are needed by the following coalesce pass.
-    IndexSet liveout(_live->live(b));
+    IndexSet liveout(_live->live(block));

-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];

      // Pre-split compares of loop-phis.  Loop-phis form a cycle we would
      // like to see in the same register.  Compare uses the loop-phi and so
@ -1814,7 +1801,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
        Node *phi = n->in(1);
        if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
          Block *phi_block = _cfg.get_block_for_node(phi);
-          if (_cfg.get_block_for_node(phi_block->pred(2)) == b) {
+          if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
            const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
            Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
            insert_proj( phi_block, 1, spill, maxlrg++ );
@ -1868,7 +1855,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
            if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
                 !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
                 (_lrg_map.live_range_id(base) > 0) && // not a constant
-                 _cfg.get_block_for_node(base) != b) { // base not def'd in blk)
+                 _cfg.get_block_for_node(base) != block) { // base not def'd in blk)
              // Base pointer is not currently live.  Since I stretched
              // the base pointer to here and it crosses basic-block
              // boundaries, the global live info is now incorrect.
@ -1903,15 +1890,12 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
  return must_recompute_live != 0;
 }

-
-//------------------------------add_reference----------------------------------
 // Extend the node to LRG mapping

 void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
  _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
 }

-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseChaitin::dump(const Node *n) const {
  uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
@ -2017,8 +2001,9 @@ void PhaseChaitin::dump() const {
              _matcher._new_SP, _framesize );

  // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ )
-    dump(_cfg._blocks[i]);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    dump(_cfg.get_block(i));
+  }
  // End of per-block dump
  tty->print("\n");

@ -2059,7 +2044,6 @@ void PhaseChaitin::dump() const {
  tty->print_cr("");
 }

-//------------------------------dump_degree_lists------------------------------
 void PhaseChaitin::dump_degree_lists() const {
  // Dump lo-degree list
  tty->print("Lo degree: ");
@ -2080,7 +2064,6 @@ void PhaseChaitin::dump_degree_lists() const {
  tty->print_cr("");
 }

-//------------------------------dump_simplified--------------------------------
 void PhaseChaitin::dump_simplified() const {
  tty->print("Simplified: ");
  for( uint i = _simplified; i; i = lrgs(i)._next )
@ -2099,7 +2082,6 @@ static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
  return buf+strlen(buf);
 }

-//------------------------------dump_register----------------------------------
 // Dump a register name into a buffer.  Be intelligent if we get called
 // before allocation is complete.
 char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
@ -2133,7 +2115,6 @@ char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
  return buf+strlen(buf);
 }

-//----------------------dump_for_spill_split_recycle--------------------------
 void PhaseChaitin::dump_for_spill_split_recycle() const {
  if( WizardMode && (PrintCompilation || PrintOpto) ) {
    // Display which live ranges need to be split and the allocator's state
@ -2149,7 +2130,6 @@ void PhaseChaitin::dump_for_spill_split_recycle() const {
  }
 }

-//------------------------------dump_frame------------------------------------
 void PhaseChaitin::dump_frame() const {
  const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
  const TypeTuple *domain = C->tf()->domain();
@ -2255,17 +2235,16 @@ void PhaseChaitin::dump_frame() const {
  tty->print_cr("#");
 }

-//------------------------------dump_bb----------------------------------------
 void PhaseChaitin::dump_bb( uint pre_order ) const {
  tty->print_cr("---dump of B%d---",pre_order);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    if( b->_pre_order == pre_order )
-      dump(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    if (block->_pre_order == pre_order) {
+      dump(block);
+    }
  }
 }

-//------------------------------dump_lrg---------------------------------------
 void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
  tty->print_cr("---dump of L%d---",lidx);

@ -2287,17 +2266,17 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
    tty->cr();
  }
  // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
    int dump_once = 0;

    // For all instructions
-    for( uint j = 0; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for( uint j = 0; j < block->_nodes.size(); j++ ) {
+      Node *n = block->_nodes[j];
      if (_lrg_map.find_const(n) == lidx) {
        if (!dump_once++) {
          tty->cr();
-          b->dump_head(&_cfg);
+          block->dump_head(&_cfg);
        }
        dump(n);
        continue;
@ -2312,7 +2291,7 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
          if (_lrg_map.find_const(m) == lidx) {
            if (!dump_once++) {
              tty->cr();
-              b->dump_head(&_cfg);
+              block->dump_head(&_cfg);
            }
            dump(n);
          }
@ -2324,7 +2303,6 @@ void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
 }
 #endif // not PRODUCT

-//------------------------------print_chaitin_statistics-------------------------------
 int PhaseChaitin::_final_loads  = 0;
 int PhaseChaitin::_final_stores = 0;
 int PhaseChaitin::_final_memoves= 0;
--- a/hotspot/src/share/vm/opto/chaitin.hpp
+++ b/hotspot/src/share/vm/opto/chaitin.hpp
@ -412,33 +412,22 @@ class PhaseChaitin : public PhaseRegAlloc {
  uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
  uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );

-  bool clone_projs(Block *b, uint idx, Node *con, Node *copy, LiveRangeMap &lrg_map) {
-    bool found_projs = clone_projs_shared(b, idx, con, copy, lrg_map.max_lrg_id());
-
-    if(found_projs) {
-      uint max_lrg_id = lrg_map.max_lrg_id();
-      lrg_map.set_max_lrg_id(max_lrg_id + 1);
-    }
-
-    return found_projs;
-  }
-
  //------------------------------clone_projs------------------------------------
  // After cloning some rematerialized instruction, clone any MachProj's that
  // follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
  // use G3 as an address temp.
-  bool clone_projs(Block *b, uint idx, Node *con, Node *copy, uint &max_lrg_id) {
-    bool found_projs = clone_projs_shared(b, idx, con, copy, max_lrg_id);
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id);

-    if(found_projs) {
-      max_lrg_id++;
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) {
+    uint max_lrg_id = lrg_map.max_lrg_id();
+    int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id);
+    if (found_projs > 0) {
+      // max_lrg_id is updated during call above
+      lrg_map.set_max_lrg_id(max_lrg_id);
    }
-
    return found_projs;
  }

-  bool clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id);
-
  Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits,
                            int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
  // True if lidx is used before any real register is def'd in the block
--- a/hotspot/src/share/vm/opto/coalesce.cpp
+++ b/hotspot/src/share/vm/opto/coalesce.cpp
@ -34,8 +34,6 @@
 #include "opto/matcher.hpp"
 #include "opto/regmask.hpp"

-//=============================================================================
-//------------------------------Dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseCoalesce::dump(Node *n) const {
  // Being a const function means I cannot use 'Find'
@ -43,12 +41,11 @@ void PhaseCoalesce::dump(Node *n) const {
  tty->print("L%d/N%d ",r,n->_idx);
 }

-//------------------------------dump-------------------------------------------
 void PhaseCoalesce::dump() const {
  // I know I have a block layout now, so I can print blocks in a loop
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+  for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
    uint j;
-    Block *b = _phc._cfg._blocks[i];
+    Block* b = _phc._cfg.get_block(i);
    // Print a nice block header
    tty->print("B%d: ",b->_pre_order);
    for( j=1; j<b->num_preds(); j++ )
@ -85,7 +82,6 @@ void PhaseCoalesce::dump() const {
 }
 #endif

-//------------------------------combine_these_two------------------------------
 // Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
 void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
  uint lr1 = _phc._lrg_map.find(n1);
@ -127,18 +123,15 @@ void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
  }
 }

-//------------------------------coalesce_driver--------------------------------
 // Copy coalescing
-void PhaseCoalesce::coalesce_driver( ) {
-
+void PhaseCoalesce::coalesce_driver() {
  verify();
  // Coalesce from high frequency to low
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ )
-    coalesce( _phc._blks[i] );
-
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
+    coalesce(_phc._blks[i]);
+  }
 }

-//------------------------------insert_copy_with_overlap-----------------------
 // I am inserting copies to come out of SSA form.  In the general case, I am
 // doing a parallel renaming.  I'm in the Named world now, so I can't do a
 // general parallel renaming.  All the copies now use  "names" (live-ranges)
@ -216,7 +209,6 @@ void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, ui
  b->_nodes.insert(last_use_idx+1,copy);
 }

-//------------------------------insert_copies----------------------------------
 void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // We do LRGs compressing and fix a liveout data only here since the other
  // place in Split() is guarded by the assert which we never hit.
@ -225,8 +217,8 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
    uint compressed_lrg = _phc._lrg_map.find(lrg);
    if (lrg != compressed_lrg) {
-      for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) {
-        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
+      for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
+        IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
        if (liveout->member(lrg)) {
          liveout->remove(lrg);
          liveout->insert(compressed_lrg);
@ -239,10 +231,10 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // Nodes with index less than '_unique' are original, non-virtual Nodes.
  _unique = C->unique();

-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
    C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
    if (C->failing()) return;
-    Block *b = _phc._cfg._blocks[i];
+    Block *b = _phc._cfg.get_block(i);
    uint cnt = b->num_preds();  // Number of inputs to the Phi

    for( uint l = 1; l<b->_nodes.size(); l++ ) {
@ -330,9 +322,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
              copy = m->clone();
              // Insert the copy in the basic block, just before us
              b->_nodes.insert(l++, copy);
-              if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) {
-                l++;
-              }
+              l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
              copy = new (C) MachSpillCopyNode(m, *rm, *rm);
@ -403,8 +393,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  } // End of for all blocks
 }

-//=============================================================================
-//------------------------------coalesce---------------------------------------
+
 // Aggressive (but pessimistic) copy coalescing of a single block

 // The following coalesce pass represents a single round of aggressive
@ -464,20 +453,16 @@ void PhaseAggressiveCoalesce::coalesce( Block *b ) {
  } // End of for all instructions in block
 }

-//=============================================================================
-//------------------------------PhaseConservativeCoalesce----------------------
 PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
  _ulr.initialize(_phc._lrg_map.max_lrg_id());
 }

-//------------------------------verify-----------------------------------------
 void PhaseConservativeCoalesce::verify() {
 #ifdef ASSERT
  _phc.set_was_low();
 #endif
 }

-//------------------------------union_helper-----------------------------------
 void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
  // union-find tree
@ -520,7 +505,6 @@ void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, ui
  }
 }

-//------------------------------compute_separating_interferences---------------
 // Factored code from copy_copy that computes extra interferences from
 // lengthening a live range by double-coalescing.
 uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
@ -586,7 +570,6 @@ uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy,
  return reg_degree;
 }

-//------------------------------update_ifg-------------------------------------
 void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
  // Some original neighbors of lr1 might have gone away
  // because the constrained register mask prevented them.
@ -616,7 +599,6 @@ void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1,
      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
 }

-//------------------------------record_bias------------------------------------
 static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
  // Tag copy bias here
  if( !ifg->lrgs(lr1)._copy_bias )
@ -625,7 +607,6 @@ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
    ifg->lrgs(lr2)._copy_bias = lr1;
 }

-//------------------------------copy_copy--------------------------------------
 // See if I can coalesce a series of multiple copies together.  I need the
 // final dest copy and the original src copy.  They can be the same Node.
 // Compute the compatible register masks.
@ -785,7 +766,6 @@ bool PhaseConservativeCoalesce::copy_copy(Node *dst_copy, Node *src_copy, Block
  return true;
 }

-//------------------------------coalesce---------------------------------------
 // Conservative (but pessimistic) copy coalescing of a single block
 void PhaseConservativeCoalesce::coalesce( Block *b ) {
  // Bail out on infrequent blocks
--- a/hotspot/src/share/vm/opto/compile.cpp
+++ b/hotspot/src/share/vm/opto/compile.cpp
@ -2136,7 +2136,9 @@ void Compile::Optimize() {
 //------------------------------Code_Gen---------------------------------------
 // Given a graph, generate code for it
 void Compile::Code_Gen() {
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }

  // Perform instruction selection.  You might think we could reclaim Matcher
  // memory PDQ, but actually the Matcher is used in generating spill code.
@ -2148,12 +2150,11 @@ void Compile::Code_Gen() {
  // nodes.  Mapping is only valid at the root of each matched subtree.
  NOT_PRODUCT( verify_graph_edges(); )

-  Node_List proj_list;
-  Matcher m(proj_list);
-  _matcher = &m;
+  Matcher matcher;
+  _matcher = &matcher;
  {
    TracePhase t2("matcher", &_t_matcher, true);
-    m.match();
+    matcher.match();
  }
  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
  // nodes.  Mapping is only valid at the root of each matched subtree.
@ -2161,31 +2162,26 @@ void Compile::Code_Gen() {

  // If you have too many nodes, or if matching has failed, bail out
  check_node_count(0, "out of nodes matching instructions");
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }

  // Build a proper-looking CFG
-  PhaseCFG cfg(node_arena(), root(), m);
+  PhaseCFG cfg(node_arena(), root(), matcher);
  _cfg = &cfg;
  {
    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
-    cfg.Dominators();
-    if (failing())  return;
-
-    NOT_PRODUCT( verify_graph_edges(); )
-
-    cfg.Estimate_Block_Frequency();
-    cfg.GlobalCodeMotion(m,unique(),proj_list);
-    if (failing())  return;
+    bool success = cfg.do_global_code_motion();
+    if (!success) {
+      return;
+    }

    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
-
    NOT_PRODUCT( verify_graph_edges(); )
-
    debug_only( cfg.verify(); )
  }
-  NOT_PRODUCT( verify_graph_edges(); )

-  PhaseChaitin regalloc(unique(), cfg, m);
+  PhaseChaitin regalloc(unique(), cfg, matcher);
  _regalloc = &regalloc;
  {
    TracePhase t2("regalloc", &_t_registerAllocation, true);
@ -2206,7 +2202,7 @@ void Compile::Code_Gen() {
  // can now safely remove it.
  {
    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
-    cfg.remove_empty();
+    cfg.remove_empty_blocks();
    if (do_freq_based_layout()) {
      PhaseBlockLayout layout(cfg);
    } else {
@ -2253,38 +2249,50 @@ void Compile::dump_asm(int *pcs, uint pc_limit) {
  _regalloc->dump_frame();

  Node *n = NULL;
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    if (VMThread::should_terminate()) { cut_short = true; break; }
-    Block *b = _cfg->_blocks[i];
-    if (b->is_connector() && !Verbose) continue;
-    n = b->_nodes[0];
-    if (pcs && n->_idx < pc_limit)
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    if (VMThread::should_terminate()) {
+      cut_short = true;
+      break;
+    }
+    Block* block = _cfg->get_block(i);
+    if (block->is_connector() && !Verbose) {
+      continue;
+    }
+    n = block->_nodes[0];
+    if (pcs && n->_idx < pc_limit) {
      tty->print("%3.3x   ", pcs[n->_idx]);
-    else
+    } else {
      tty->print("      ");
-    b->dump_head(_cfg);
-    if (b->is_connector()) {
+    }
+    block->dump_head(_cfg);
+    if (block->is_connector()) {
      tty->print_cr("        # Empty connector block");
-    } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
+    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
      tty->print_cr("        # Block is sole successor of call");
    }

    // For all instructions
    Node *delay = NULL;
-    for( uint j = 0; j<b->_nodes.size(); j++ ) {
-      if (VMThread::should_terminate()) { cut_short = true; break; }
-      n = b->_nodes[j];
+    for (uint j = 0; j < block->_nodes.size(); j++) {
+      if (VMThread::should_terminate()) {
+        cut_short = true;
+        break;
+      }
+      n = block->_nodes[j];
      if (valid_bundle_info(n)) {
-        Bundle *bundle = node_bundling(n);
+        Bundle* bundle = node_bundling(n);
        if (bundle->used_in_unconditional_delay()) {
          delay = n;
          continue;
        }
-        if (bundle->starts_bundle())
+        if (bundle->starts_bundle()) {
          starts_bundle = '+';
+        }
      }

-      if (WizardMode) n->dump();
+      if (WizardMode) {
+        n->dump();
+      }

      if( !n->is_Region() &&    // Dont print in the Assembly
          !n->is_Phi() &&       // a few noisely useless nodes
--- a/hotspot/src/share/vm/opto/domgraph.cpp
+++ b/hotspot/src/share/vm/opto/domgraph.cpp
@ -32,9 +32,6 @@

 // Portions of code courtesy of Clifford Click

-// Optimization - Graph Style
-
-//------------------------------Tarjan-----------------------------------------
 // A data structure that holds all the information needed to find dominators.
 struct Tarjan {
  Block *_block;                // Basic block for this info
@ -60,23 +57,21 @@ struct Tarjan {

 };

-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the CFG.  The CFG must already have been
 // constructed.  This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
-void PhaseCFG::Dominators( ) {
+void PhaseCFG::build_dominator_tree() {
  // Pre-grow the blocks array, prior to the ResourceMark kicking in
-  _blocks.map(_num_blocks,0);
+  _blocks.map(number_of_blocks(), 0);

  ResourceMark rm;
  // Setup mappings from my Graph to Tarjan's stuff and back
  // Note: Tarjan uses 1-based arrays
-  Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
+  Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);

  // Tarjan's algorithm, almost verbatim:
  // Step 1:
-  _rpo_ctr = _num_blocks;
-  uint dfsnum = DFS( tarjan );
-  if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
+  uint dfsnum = do_DFS(tarjan, number_of_blocks());
+  if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
    // If the returned dfsnum does not match the number of blocks, then we
    // must have some unreachable loops.  These can be made at any time by
    // IterGVN.  They are cleaned up by CCP or the loop opts, but the last
@ -93,14 +88,13 @@ void PhaseCFG::Dominators( ) {
    C->record_method_not_compilable("unreachable loop");
    return;
  }
-  _blocks._cnt = _num_blocks;
+  _blocks._cnt = number_of_blocks();

  // Tarjan is using 1-based arrays, so these are some initialize flags
  tarjan[0]._size = tarjan[0]._semi = 0;
  tarjan[0]._label = &tarjan[0];

-  uint i;
-  for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
+  for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
    Tarjan *w = &tarjan[i];     // Get vertex from DFS

    // Step 2:
@ -130,19 +124,19 @@ void PhaseCFG::Dominators( ) {
  }

  // Step 4:
-  for( i=2; i <= _num_blocks; i++ ) {
+  for (uint i = 2; i <= number_of_blocks(); i++) {
    Tarjan *w = &tarjan[i];
    if( w->_dom != &tarjan[w->_semi] )
      w->_dom = w->_dom->_dom;
    w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
  }
  // No immediate dominator for the root
-  Tarjan *w = &tarjan[_broot->_pre_order];
+  Tarjan *w = &tarjan[get_root_block()->_pre_order];
  w->_dom = NULL;
  w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later

  // Convert the dominator tree array into my kind of graph
-  for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
+  for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
    Tarjan *t = &tarjan[i];     // Handy access
    Tarjan *tdom = t->_dom;     // Handy access to immediate dominator
    if( tdom )  {               // Root has no immediate dominator
@ -152,11 +146,10 @@ void PhaseCFG::Dominators( ) {
    } else
      t->_block->_idom = NULL;  // Root
  }
-  w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
+  w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree

 }

-//----------------------------Block_Stack--------------------------------------
 class Block_Stack {
  private:
    struct Block_Descr {
@ -214,7 +207,6 @@ class Block_Stack {
    }
 };

-//-------------------------most_frequent_successor-----------------------------
 // Find the index into the b->succs[] array of the most frequent successor.
 uint Block_Stack::most_frequent_successor( Block *b ) {
  uint freq_idx = 0;
@ -258,40 +250,38 @@ uint Block_Stack::most_frequent_successor( Block *b ) {
  return freq_idx;
 }

-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
-uint PhaseCFG::DFS( Tarjan *tarjan ) {
-  Block *b = _broot;
+uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
+  Block* root_block = get_root_block();
  uint pre_order = 1;
-  // Allocate stack of size _num_blocks+1 to avoid frequent realloc
-  Block_Stack bstack(tarjan, _num_blocks+1);
+  // Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
+  Block_Stack bstack(tarjan, number_of_blocks() + 1);

  // Push on stack the state for the first block
-  bstack.push(pre_order, b);
+  bstack.push(pre_order, root_block);
  ++pre_order;

  while (bstack.is_nonempty()) {
    if (!bstack.last_successor()) {
      // Walk over all successors in pre-order (DFS).
-      Block *s = bstack.next_successor();
-      if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
+      Block* next_block = bstack.next_successor();
+      if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
        // Push on stack the state of successor
-        bstack.push(pre_order, s);
+        bstack.push(pre_order, next_block);
        ++pre_order;
      }
    }
    else {
      // Build a reverse post-order in the CFG _blocks array
      Block *stack_top = bstack.pop();
-      stack_top->_rpo = --_rpo_ctr;
+      stack_top->_rpo = --rpo_counter;
      _blocks.map(stack_top->_rpo, stack_top);
    }
  }
  return pre_order;
 }

-//------------------------------COMPRESS---------------------------------------
 void Tarjan::COMPRESS()
 {
  assert( _ancestor != 0, "" );
@ -303,14 +293,12 @@ void Tarjan::COMPRESS()
  }
 }

-//------------------------------EVAL-------------------------------------------
 Tarjan *Tarjan::EVAL() {
  if( !_ancestor ) return _label;
  COMPRESS();
  return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }

-//------------------------------LINK-------------------------------------------
 void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
  Tarjan *s = w;
  while( w->_label->_semi < s->_child->_label->_semi ) {
@ -333,7 +321,6 @@ void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
  }
 }

-//------------------------------setdepth---------------------------------------
 void Tarjan::setdepth( uint stack_size ) {
  Tarjan **top  = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
  Tarjan **next = top;
@ -362,8 +349,7 @@ void Tarjan::setdepth( uint stack_size ) {
  } while (last < top);
 }

-//*********************** DOMINATORS ON THE SEA OF NODES***********************
-//------------------------------NTarjan----------------------------------------
+// Compute dominators on the Sea of Nodes form
 // A data structure that holds all the information needed to find dominators.
 struct NTarjan {
  Node *_control;               // Control node associated with this info
@ -396,7 +382,6 @@ struct NTarjan {
 #endif
 };

-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the sea of nodes.  This version walks all CFG
 // nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
 // it needs a count of the CFG nodes for the mapping table. This is the
@ -517,7 +502,6 @@ void PhaseIdealLoop::Dominators() {
  }
 }

-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
 int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
@ -560,7 +544,6 @@ int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uin
  return dfsnum;
 }

-//------------------------------COMPRESS---------------------------------------
 void NTarjan::COMPRESS()
 {
  assert( _ancestor != 0, "" );
@ -572,14 +555,12 @@ void NTarjan::COMPRESS()
  }
 }

-//------------------------------EVAL-------------------------------------------
 NTarjan *NTarjan::EVAL() {
  if( !_ancestor ) return _label;
  COMPRESS();
  return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }

-//------------------------------LINK-------------------------------------------
 void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
  NTarjan *s = w;
  while( w->_label->_semi < s->_child->_label->_semi ) {
@ -602,7 +583,6 @@ void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
  }
 }

-//------------------------------setdepth---------------------------------------
 void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
  NTarjan **top  = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
  NTarjan **next = top;
@ -631,7 +611,6 @@ void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
  } while (last < top);
 }

-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void NTarjan::dump(int offset) const {
  // Dump the data from this node
--- a/hotspot/src/share/vm/opto/gcm.cpp
+++ b/hotspot/src/share/vm/opto/gcm.cpp
@ -121,27 +121,30 @@ void PhaseCFG::replace_block_proj_ctrl( Node *n ) {

 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
-void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
  // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique()+8);
+  GrowableArray <Node *> spstack(C->unique() + 8);
  spstack.push(_root);
-  while ( spstack.is_nonempty() ) {
-    Node *n = spstack.pop();
-    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
-      if( n->pinned() && !has_block(n)) {  // Pinned?  Nail it down!
-        assert( n->in(0), "pinned Node must have Control" );
+  while (spstack.is_nonempty()) {
+    Node* node = spstack.pop();
+    if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
+      if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
+        assert(node->in(0), "pinned Node must have Control");
        // Before setting block replace block_proj control edge
-        replace_block_proj_ctrl(n);
-        Node *input = n->in(0);
+        replace_block_proj_ctrl(node);
+        Node* input = node->in(0);
        while (!input->is_block_start()) {
          input = input->in(0);
        }
-        Block *b = get_block_for_node(input); // Basic block of controlling input
-        schedule_node_into_block(n, b);
+        Block* block = get_block_for_node(input); // Basic block of controlling input
+        schedule_node_into_block(node, block);
      }
-      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
-        if( n->in(i) != NULL )
-          spstack.push(n->in(i));
+
+      // process all inputs that are non NULL
+      for (int i = node->req() - 1; i >= 0; --i) {
+        if (node->in(i) != NULL) {
+          spstack.push(node->in(i));
+        }
      }
    }
  }
@ -205,32 +208,29 @@ static Block* find_deepest_input(Node* n, const PhaseCFG* cfg) {
 // which all their inputs occur.
 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
  // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
-  // roots.push(_root); _root will be processed among C->top() inputs
+  Node_Stack nstack(roots.Size() + 8);
+  // _root will be processed among C->top() inputs
  roots.push(C->top());
  visited.set(C->top()->_idx);

  while (roots.size() != 0) {
    // Use local variables nstack_top_n & nstack_top_i to cache values
    // on stack's top.
-    Node *nstack_top_n = roots.pop();
-    uint  nstack_top_i = 0;
-//while_nstack_nonempty:
-    while (true) {
-      // Get parent node and next input's index from stack's top.
-      Node *n = nstack_top_n;
-      uint  i = nstack_top_i;
+    Node* parent_node = roots.pop();
+    uint  input_index = 0;

-      if (i == 0) {
+    while (true) {
+      if (input_index == 0) {
        // Fixup some control.  Constants without control get attached
        // to root and nodes that use is_block_proj() nodes should be attached
        // to the region that starts their block.
-        const Node *in0 = n->in(0);
-        if (in0 != NULL) {              // Control-dependent?
-          replace_block_proj_ctrl(n);
-        } else {               // n->in(0) == NULL
-          if (n->req() == 1) { // This guy is a constant with NO inputs?
-            n->set_req(0, _root);
+        const Node* control_input = parent_node->in(0);
+        if (control_input != NULL) {
+          replace_block_proj_ctrl(parent_node);
+        } else {
+          // Is a constant with NO inputs?
+          if (parent_node->req() == 1) {
+            parent_node->set_req(0, _root);
          }
        }
      }
@ -239,37 +239,47 @@ bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
      // input is already in a block we quit following inputs (to avoid
      // cycles). Instead we put that Node on a worklist to be handled
      // later (since IT'S inputs may not have a block yet).
-      bool done = true;              // Assume all n's inputs will be processed
-      while (i < n->len()) {         // For all inputs
-        Node *in = n->in(i);         // Get input
-        ++i;
-        if (in == NULL) continue;    // Ignore NULL, missing inputs
+
+      // Assume all n's inputs will be processed
+      bool done = true;
+
+      while (input_index < parent_node->len()) {
+        Node* in = parent_node->in(input_index++);
+        if (in == NULL) {
+          continue;
+        }
+
        int is_visited = visited.test_set(in->_idx);
-        if (!has_block(in)) { // Missing block selection?
+        if (!has_block(in)) {
          if (is_visited) {
-            // assert( !visited.test(in->_idx), "did not schedule early" );
            return false;
          }
-          nstack.push(n, i);         // Save parent node and next input's index.
-          nstack_top_n = in;         // Process current input now.
-          nstack_top_i = 0;
-          done = false;              // Not all n's inputs processed.
-          break; // continue while_nstack_nonempty;
-        } else if (!is_visited) {    // Input not yet visited?
-          roots.push(in);            // Visit this guy later, using worklist
+          // Save parent node and next input's index.
+          nstack.push(parent_node, input_index);
+          // Process current input now.
+          parent_node = in;
+          input_index = 0;
+          // Not all n's inputs processed.
+          done = false;
+          break;
+        } else if (!is_visited) {
+          // Visit this guy later, using worklist
+          roots.push(in);
        }
      }
+
      if (done) {
        // All of n's inputs have been processed, complete post-processing.

        // Some instructions are pinned into a block.  These include Region,
        // Phi, Start, Return, and other control-dependent instructions and
        // any projections which depend on them.
-        if (!n->pinned()) {
+        if (!parent_node->pinned()) {
          // Set earliest legal block.
-          map_node_to_block(n, find_deepest_input(n, this));
+          Block* earliest_block = find_deepest_input(parent_node, this);
+          map_node_to_block(parent_node, earliest_block);
        } else {
-          assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
+          assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
        }

        if (nstack.is_empty()) {
@ -278,12 +288,12 @@ bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
          break;
        }
        // Get saved parent node and next input's index.
-        nstack_top_n = nstack.node();
-        nstack_top_i = nstack.index();
+        parent_node = nstack.node();
+        input_index = nstack.index();
        nstack.pop();
-      } //    if (done)
-    }   // while (true)
-  }     // while (roots.size() != 0)
+      }
+    }
+  }
  return true;
 }

@ -847,7 +857,7 @@ Node *Node_Backward_Iterator::next() {

 //------------------------------ComputeLatenciesBackwards----------------------
 // Compute the latency of all the instructions.
-void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
 #ifndef PRODUCT
  if (trace_opto_pipelining())
    tty->print("\n#---- ComputeLatenciesBackwards ----\n");
@ -870,31 +880,34 @@ void PhaseCFG::partial_latency_of_defs(Node *n) {
  // Set the latency for this instruction
 #ifndef PRODUCT
  if (trace_opto_pipelining()) {
-    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
    dump();
  }
 #endif

-  if (n->is_Proj())
+  if (n->is_Proj()) {
    n = n->in(0);
+  }

-  if (n->is_Root())
+  if (n->is_Root()) {
    return;
+  }

  uint nlen = n->len();
-  uint use_latency = _node_latency->at_grow(n->_idx);
+  uint use_latency = get_latency_for_node(n);
  uint use_pre_order = get_block_for_node(n)->_pre_order;

-  for ( uint j=0; j<nlen; j++ ) {
+  for (uint j = 0; j < nlen; j++) {
    Node *def = n->in(j);

-    if (!def || def == n)
+    if (!def || def == n) {
      continue;
+    }

    // Walk backwards thru projections
-    if (def->is_Proj())
+    if (def->is_Proj()) {
      def = def->in(0);
+    }

 #ifndef PRODUCT
    if (trace_opto_pipelining()) {
@ -907,22 +920,20 @@ void PhaseCFG::partial_latency_of_defs(Node *n) {
    Block *def_block = get_block_for_node(def);
    uint def_pre_order = def_block ? def_block->_pre_order : 0;

-    if ( (use_pre_order <  def_pre_order) ||
-         (use_pre_order == def_pre_order && n->is_Phi()) )
+    if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
      continue;
+    }

    uint delta_latency = n->latency(j);
    uint current_latency = delta_latency + use_latency;

-    if (_node_latency->at_grow(def->_idx) < current_latency) {
-      _node_latency->at_put_grow(def->_idx, current_latency);
+    if (get_latency_for_node(def) < current_latency) {
+      set_latency_for_node(def, current_latency);
    }

 #ifndef PRODUCT
    if (trace_opto_pipelining()) {
-      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
-                    use_latency, j, delta_latency, current_latency, def->_idx,
-                    _node_latency->at_grow(def->_idx));
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
    }
 #endif
  }
@ -957,7 +968,7 @@ int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
      return 0;

    uint nlen = use->len();
-    uint nl = _node_latency->at_grow(use->_idx);
+    uint nl = get_latency_for_node(use);

    for ( uint j=0; j<nlen; j++ ) {
      if (use->in(j) == n) {
@ -992,8 +1003,7 @@ void PhaseCFG::latency_from_uses(Node *n) {
  // Set the latency for this instruction
 #ifndef PRODUCT
  if (trace_opto_pipelining()) {
-    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
    dump();
  }
 #endif
@ -1006,7 +1016,7 @@ void PhaseCFG::latency_from_uses(Node *n) {
    if (latency < l) latency = l;
  }

-  _node_latency->at_put_grow(n->_idx, latency);
+  set_latency_for_node(n, latency);
 }

 //------------------------------hoist_to_cheaper_block-------------------------
@ -1016,9 +1026,9 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
  const double delta = 1+PROB_UNLIKELY_MAG(4);
  Block* least       = LCA;
  double least_freq  = least->_freq;
-  uint target        = _node_latency->at_grow(self->_idx);
-  uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
-  uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  uint target        = get_latency_for_node(self);
+  uint start_latency = get_latency_for_node(LCA->_nodes[0]);
+  uint end_latency   = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
  bool in_latency    = (target <= start_latency);
  const Block* root_block = get_block_for_node(_root);

@ -1035,8 +1045,7 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {

 #ifndef PRODUCT
  if (trace_opto_pipelining()) {
-    tty->print("# Find cheaper block for latency %d: ",
-      _node_latency->at_grow(self->_idx));
+    tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
    self->dump();
    tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
      LCA->_pre_order,
@ -1065,9 +1074,9 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
    if (mach && LCA == root_block)
      break;

-    uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
+    uint start_lat = get_latency_for_node(LCA->_nodes[0]);
    uint end_idx   = LCA->end_idx();
-    uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
+    uint end_lat   = get_latency_for_node(LCA->_nodes[end_idx]);
    double LCA_freq = LCA->_freq;
 #ifndef PRODUCT
    if (trace_opto_pipelining()) {
@ -1109,7 +1118,7 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
      tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
    }
 #endif
-    _node_latency->at_put_grow(self->_idx, end_latency);
+    set_latency_for_node(self, end_latency);
    partial_latency_of_defs(self);
  }

@ -1255,7 +1264,7 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
 } // end ScheduleLate

 //------------------------------GlobalCodeMotion-------------------------------
-void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+void PhaseCFG::global_code_motion() {
  ResourceMark rm;

 #ifndef PRODUCT
@ -1265,21 +1274,22 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
 #endif

  // Initialize the node to block mapping for things on the proj_list
-  for (uint i = 0; i < proj_list.size(); i++) {
-    unmap_node_from_block(proj_list[i]);
+  for (uint i = 0; i < _matcher.number_of_projections(); i++) {
+    unmap_node_from_block(_matcher.get_projection(i));
  }

  // Set the basic block for Nodes pinned into blocks
-  Arena *a = Thread::current()->resource_area();
-  VectorSet visited(a);
-  schedule_pinned_nodes( visited );
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  schedule_pinned_nodes(visited);

  // Find the earliest Block any instruction can be placed in.  Some
  // instructions are pinned into Blocks.  Unpinned instructions can
  // appear in last block in which all their inputs occur.
  visited.Clear();
-  Node_List stack(a);
-  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  Node_List stack(arena);
+  // Pre-grow the list
+  stack.map((C->unique() >> 1) + 16, NULL);
  if (!schedule_early(visited, stack)) {
    // Bailout without retry
    C->record_method_not_compilable("early schedule failed");
@ -1287,29 +1297,25 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
  }

  // Build Def-Use edges.
-  proj_list.push(_root);        // Add real root as another root
-  proj_list.pop();
-
  // Compute the latency information (via backwards walk) for all the
  // instructions in the graph
  _node_latency = new GrowableArray<uint>(); // resource_area allocation

-  if( C->do_scheduling() )
-    ComputeLatenciesBackwards(visited, stack);
+  if (C->do_scheduling()) {
+    compute_latencies_backwards(visited, stack);
+  }

  // Now schedule all codes as LATE as possible.  This is the LCA in the
  // dominator tree of all USES of a value.  Pick the block with the least
  // loop nesting depth that is lowest in the dominator tree.
  // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
  schedule_late(visited, stack);
-  if( C->failing() ) {
+  if (C->failing()) {
    // schedule_late fails only when graph is incorrect.
    assert(!VerifyGraphEdges, "verification should have failed");
    return;
  }

-  unique = C->unique();
-
 #ifndef PRODUCT
  if (trace_opto_pipelining()) {
    tty->print("\n---- Detect implicit null checks ----\n");
@ -1332,10 +1338,11 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
    // By reversing the loop direction we get a very minor gain on mpegaudio.
    // Feel free to revert to a forward loop for clarity.
    // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
-    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
-      Node *proj = matcher._null_check_tests[i  ];
-      Node *val  = matcher._null_check_tests[i+1];
-      get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
+    for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
+      Node* proj = _matcher._null_check_tests[i];
+      Node* val  = _matcher._null_check_tests[i + 1];
+      Block* block = get_block_for_node(proj);
+      block->implicit_null_check(this, proj, val, allowed_reasons);
      // The implicit_null_check will only perform the transformation
      // if the null branch is truly uncommon, *and* it leads to an
      // uncommon trap.  Combined with the too_many_traps guards
@ -1352,11 +1359,11 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_

  // Schedule locally.  Right now a simple topological sort.
  // Later, do a real latency aware scheduler.
-  uint max_idx = C->unique();
-  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
+  GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
  visited.Clear();
-  for (uint i = 0; i < _num_blocks; i++) {
-    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
      if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
        C->record_method_not_compilable("local schedule failed");
      }
@ -1366,15 +1373,17 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_

  // If we inserted any instructions between a Call and his CatchNode,
  // clone the instructions on all paths below the Catch.
-  for (uint i = 0; i < _num_blocks; i++) {
-    _blocks[i]->call_catch_cleanup(this, C);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->call_catch_cleanup(this, C);
  }

 #ifndef PRODUCT
  if (trace_opto_pipelining()) {
    tty->print("\n---- After GlobalCodeMotion ----\n");
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump();
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      Block* block = get_block(i);
+      block->dump();
    }
  }
 #endif
@ -1382,10 +1391,29 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_
  _node_latency = (GrowableArray<uint> *)0xdeadbeef;
 }

+bool PhaseCFG::do_global_code_motion() {
+
+  build_dominator_tree();
+  if (C->failing()) {
+    return false;
+  }
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  estimate_block_frequency();
+
+  global_code_motion();
+
+  if (C->failing()) {
+    return false;
+  }
+
+  return true;
+}

 //------------------------------Estimate_Block_Frequency-----------------------
 // Estimate block frequencies based on IfNode probabilities.
-void PhaseCFG::Estimate_Block_Frequency() {
+void PhaseCFG::estimate_block_frequency() {

  // Force conditional branches leading to uncommon traps to be unlikely,
  // not because we get to the uncommon_trap with less relative frequency,
@ -1393,7 +1421,7 @@ void PhaseCFG::Estimate_Block_Frequency() {
  // there once.
  if (C->do_freq_based_layout()) {
    Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
    for (uint i = 1; i < root_blk->num_preds(); i++) {
      Block *pb = get_block_for_node(root_blk->pred(i));
      if (pb->has_uncommon_code()) {
@ -1402,7 +1430,9 @@ void PhaseCFG::Estimate_Block_Frequency() {
    }
    while (worklist.size() > 0) {
      Block* uct = worklist.pop();
-      if (uct == _broot) continue;
+      if (uct == get_root_block()) {
+        continue;
+      }
      for (uint i = 1; i < uct->num_preds(); i++) {
        Block *pb = get_block_for_node(uct->pred(i));
        if (pb->_num_succs == 1) {
@ -1426,12 +1456,12 @@ void PhaseCFG::Estimate_Block_Frequency() {
  _root_loop->scale_freq();

  // Save outmost loop frequency for LRG frequency threshold
-  _outer_loop_freq = _root_loop->outer_loop_freq();
+  _outer_loop_frequency = _root_loop->outer_loop_freq();

  // force paths ending at uncommon traps to be infrequent
  if (!C->do_freq_based_layout()) {
    Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
    for (uint i = 1; i < root_blk->num_preds(); i++) {
      Block *pb = get_block_for_node(root_blk->pred(i));
      if (pb->has_uncommon_code()) {
@ -1451,8 +1481,8 @@ void PhaseCFG::Estimate_Block_Frequency() {
  }

 #ifdef ASSERT
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* b = get_block(i);
    assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
  }
 #endif
@ -1476,16 +1506,16 @@ void PhaseCFG::Estimate_Block_Frequency() {
 CFGLoop* PhaseCFG::create_loop_tree() {

 #ifdef ASSERT
-  assert( _blocks[0] == _broot, "" );
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  assert(get_block(0) == get_root_block(), "first block should be root block");
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
    // Check that _loop field are clear...we could clear them if not.
-    assert(b->_loop == NULL, "clear _loop expected");
+    assert(block->_loop == NULL, "clear _loop expected");
    // Sanity check that the RPO numbering is reflected in the _blocks array.
    // It doesn't have to be for the loop tree to be built, but if it is not,
    // then the blocks have been reordered since dom graph building...which
    // may question the RPO numbering
-    assert(b->_rpo == i, "unexpected reverse post order number");
+    assert(block->_rpo == i, "unexpected reverse post order number");
  }
 #endif

@ -1495,11 +1525,11 @@ CFGLoop* PhaseCFG::create_loop_tree() {
  Block_List worklist;

  // Assign blocks to loops
-  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
-    Block *b = _blocks[i];
+  for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
+    Block* block = get_block(i);

-    if (b->head()->is_Loop()) {
-      Block* loop_head = b;
+    if (block->head()->is_Loop()) {
+      Block* loop_head = block;
      assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
      Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
      Block* tail = get_block_for_node(tail_n);
@ -1533,23 +1563,23 @@ CFGLoop* PhaseCFG::create_loop_tree() {

  // Create a member list for each loop consisting
  // of both blocks and (immediate child) loops.
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    CFGLoop* lp = b->_loop;
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    CFGLoop* lp = block->_loop;
    if (lp == NULL) {
      // Not assigned to a loop. Add it to the method's pseudo loop.
-      b->_loop = root_loop;
+      block->_loop = root_loop;
      lp = root_loop;
    }
-    if (lp == root_loop || b != lp->head()) { // loop heads are already members
-      lp->add_member(b);
+    if (lp == root_loop || block != lp->head()) { // loop heads are already members
+      lp->add_member(block);
    }
    if (lp != root_loop) {
      if (lp->parent() == NULL) {
        // Not a nested loop. Make it a child of the method's pseudo loop.
        root_loop->add_nested_loop(lp);
      }
-      if (b == lp->head()) {
+      if (block == lp->head()) {
        // Add nested loop to member list of parent loop.
        lp->parent()->add_member(lp);
      }
--- a/hotspot/src/share/vm/opto/idealGraphPrinter.cpp
+++ b/hotspot/src/share/vm/opto/idealGraphPrinter.cpp
@ -416,7 +416,7 @@ void IdealGraphPrinter::visit_node(Node *n, bool edges, VectorSet* temp_set) {
    if (C->cfg() != NULL) {
      Block* block = C->cfg()->get_block_for_node(node);
      if (block == NULL) {
-        print_prop("block", C->cfg()->_blocks[0]->_pre_order);
+        print_prop("block", C->cfg()->get_block(0)->_pre_order);
      } else {
        print_prop("block", block->_pre_order);
      }
@ -637,10 +637,10 @@ void IdealGraphPrinter::walk_nodes(Node *start, bool edges, VectorSet* temp_set)
  if (C->cfg() != NULL) {
    // once we have a CFG there are some nodes that aren't really
    // reachable but are in the CFG so add them here.
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
-      for (uint s = 0; s < b->_nodes.size(); s++) {
-        nodeStack.push(b->_nodes[s]);
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
+      for (uint s = 0; s < block->_nodes.size(); s++) {
+        nodeStack.push(block->_nodes[s]);
      }
    }
  }
@ -698,24 +698,24 @@ void IdealGraphPrinter::print(Compile* compile, const char *name, Node *node, in
  tail(EDGES_ELEMENT);
  if (C->cfg() != NULL) {
    head(CONTROL_FLOW_ELEMENT);
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
      begin_head(BLOCK_ELEMENT);
-      print_attr(BLOCK_NAME_PROPERTY, b->_pre_order);
+      print_attr(BLOCK_NAME_PROPERTY, block->_pre_order);
      end_head();

      head(SUCCESSORS_ELEMENT);
-      for (uint s = 0; s < b->_num_succs; s++) {
+      for (uint s = 0; s < block->_num_succs; s++) {
        begin_elem(SUCCESSOR_ELEMENT);
-        print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order);
+        print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order);
        end_elem();
      }
      tail(SUCCESSORS_ELEMENT);

      head(NODES_ELEMENT);
-      for (uint s = 0; s < b->_nodes.size(); s++) {
+      for (uint s = 0; s < block->_nodes.size(); s++) {
        begin_elem(NODE_ELEMENT);
-        print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s]));
+        print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s]));
        end_elem();
      }
      tail(NODES_ELEMENT);
--- a/hotspot/src/share/vm/opto/ifg.cpp
+++ b/hotspot/src/share/vm/opto/ifg.cpp
@ -37,12 +37,9 @@
 #include "opto/memnode.hpp"
 #include "opto/opcodes.hpp"

-//=============================================================================
-//------------------------------IFG--------------------------------------------
 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
 }

-//------------------------------init-------------------------------------------
 void PhaseIFG::init( uint maxlrg ) {
  _maxlrg = maxlrg;
  _yanked = new (_arena) VectorSet(_arena);
@ -59,7 +56,6 @@ void PhaseIFG::init( uint maxlrg ) {
  }
 }

-//------------------------------add--------------------------------------------
 // Add edge between vertices a & b.  These are sorted (triangular matrix),
 // then the smaller number is inserted in the larger numbered array.
 int PhaseIFG::add_edge( uint a, uint b ) {
@ -71,7 +67,6 @@ int PhaseIFG::add_edge( uint a, uint b ) {
  return _adjs[a].insert( b );
 }

-//------------------------------add_vector-------------------------------------
 // Add an edge between 'a' and everything in the vector.
 void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
  // IFG is triangular, so do the inserts where 'a' < 'b'.
@ -86,7 +81,6 @@ void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
  }
 }

-//------------------------------test-------------------------------------------
 // Is there an edge between a and b?
 int PhaseIFG::test_edge( uint a, uint b ) const {
  // Sort a and b, so that a is larger
@ -95,7 +89,6 @@ int PhaseIFG::test_edge( uint a, uint b ) const {
  return _adjs[a].member(b);
 }

-//------------------------------SquareUp---------------------------------------
 // Convert triangular matrix to square matrix
 void PhaseIFG::SquareUp() {
  assert( !_is_square, "only on triangular" );
@ -111,7 +104,6 @@ void PhaseIFG::SquareUp() {
  _is_square = true;
 }

-//------------------------------Compute_Effective_Degree-----------------------
 // Compute effective degree in bulk
 void PhaseIFG::Compute_Effective_Degree() {
  assert( _is_square, "only on square" );
@ -120,7 +112,6 @@ void PhaseIFG::Compute_Effective_Degree() {
    lrgs(i).set_degree(effective_degree(i));
 }

-//------------------------------test_edge_sq-----------------------------------
 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
  assert( _is_square, "only on square" );
  // Swap, so that 'a' has the lesser count.  Then binary search is on
@ -130,7 +121,6 @@ int PhaseIFG::test_edge_sq( uint a, uint b ) const {
  return _adjs[a].member(b);
 }

-//------------------------------Union------------------------------------------
 // Union edges of B into A
 void PhaseIFG::Union( uint a, uint b ) {
  assert( _is_square, "only on square" );
@ -146,7 +136,6 @@ void PhaseIFG::Union( uint a, uint b ) {
  }
 }

-//------------------------------remove_node------------------------------------
 // Yank a Node and all connected edges from the IFG.  Return a
 // list of neighbors (edges) yanked.
 IndexSet *PhaseIFG::remove_node( uint a ) {
@ -165,7 +154,6 @@ IndexSet *PhaseIFG::remove_node( uint a ) {
  return neighbors(a);
 }

-//------------------------------re_insert--------------------------------------
 // Re-insert a yanked Node.
 void PhaseIFG::re_insert( uint a ) {
  assert( _is_square, "only on square" );
@ -180,7 +168,6 @@ void PhaseIFG::re_insert( uint a ) {
  }
 }

-//------------------------------compute_degree---------------------------------
 // Compute the degree between 2 live ranges.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@ -196,7 +183,6 @@ int LRG::compute_degree( LRG &l ) const {
  return tmp;
 }

-//------------------------------effective_degree-------------------------------
 // Compute effective degree for this live range.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@ -221,7 +207,6 @@ int PhaseIFG::effective_degree( uint lidx ) const {


 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 void PhaseIFG::dump() const {
  tty->print_cr("-- Interference Graph --%s--",
                _is_square ? "square" : "triangular" );
@ -260,7 +245,6 @@ void PhaseIFG::dump() const {
  tty->print("\n");
 }

-//------------------------------stats------------------------------------------
 void PhaseIFG::stats() const {
  ResourceMark rm;
  int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
@ -276,7 +260,6 @@ void PhaseIFG::stats() const {
  tty->print_cr("");
 }

-//------------------------------verify-----------------------------------------
 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
  // IFG is square, sorted and no need for Find
  for( uint i = 0; i < _maxlrg; i++ ) {
@ -298,7 +281,6 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const {
 }
 #endif

-//------------------------------interfere_with_live----------------------------
 // Interfere this register with everything currently live.  Use the RegMasks
 // to trim the set of possible interferences. Return a count of register-only
 // interferences as an estimate of register pressure.
@ -315,7 +297,6 @@ void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
      _ifg->add_edge( r, l );
 }

-//------------------------------build_ifg_virtual------------------------------
 // Actually build the interference graph.  Uses virtual registers only, no
 // physical register masks.  This allows me to be very aggressive when
 // coalescing copies.  Some of this aggressiveness will have to be undone
@ -325,9 +306,9 @@ void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
 void PhaseChaitin::build_ifg_virtual( ) {

  // For all blocks (in any order) do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    IndexSet *liveout = _live->live(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    IndexSet* liveout = _live->live(block);

    // The IFG is built by a single reverse pass over each basic block.
    // Starting with the known live-out set, we remove things that get
@ -337,8 +318,8 @@ void PhaseChaitin::build_ifg_virtual( ) {
    // The defined value interferes with everything currently live.  The
    // value is then removed from the live-ness set and it's inputs are
    // added to the live-ness set.
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];

      // Get value being defined
      uint r = _lrg_map.live_range_id(n);
@ -408,7 +389,6 @@ void PhaseChaitin::build_ifg_virtual( ) {
  } // End of forall blocks
 }

-//------------------------------count_int_pressure-----------------------------
 uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
  IndexSetIterator elements(liveout);
  uint lidx;
@ -424,7 +404,6 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
  return cnt;
 }

-//------------------------------count_float_pressure---------------------------
 uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
  IndexSetIterator elements(liveout);
  uint lidx;
@ -438,7 +417,6 @@ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
  return cnt;
 }

-//------------------------------lower_pressure---------------------------------
 // Adjust register pressure down by 1.  Capture last hi-to-low transition,
 static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
  if (lrg->mask().is_UP() && lrg->mask_size()) {
@ -460,40 +438,41 @@ static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint
  }
 }

-//------------------------------build_ifg_physical-----------------------------
 // Build the interference graph using physical registers when available.
 // That is, if 2 live ranges are simultaneously alive but in their acceptable
 // register sets do not overlap, then they do not interfere.
 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
  NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )

-  uint spill_reg = LRG::SPILL_REG;
  uint must_spill = 0;

  // For all blocks (in any order) do...
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
    // Clone (rather than smash in place) the liveout info, so it is alive
    // for the "collect_gc_info" phase later.
-    IndexSet liveout(_live->live(b));
-    uint last_inst = b->end_idx();
+    IndexSet liveout(_live->live(block));
+    uint last_inst = block->end_idx();
    // Compute first nonphi node index
    uint first_inst;
-    for( first_inst = 1; first_inst < last_inst; first_inst++ )
-      if( !b->_nodes[first_inst]->is_Phi() )
+    for (first_inst = 1; first_inst < last_inst; first_inst++) {
+      if (!block->_nodes[first_inst]->is_Phi()) {
        break;
+      }
+    }

    // Spills could be inserted before CreateEx node which should be
    // first instruction in block after Phis. Move CreateEx up.
-    for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
-      Node *ex = b->_nodes[insidx];
-      if( ex->is_SpillCopy() ) continue;
-      if( insidx > first_inst && ex->is_Mach() &&
-          ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+    for (uint insidx = first_inst; insidx < last_inst; insidx++) {
+      Node *ex = block->_nodes[insidx];
+      if (ex->is_SpillCopy()) {
+        continue;
+      }
+      if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
        // If the CreateEx isn't above all the MachSpillCopies
        // then move it to the top.
-        b->_nodes.remove(insidx);
-        b->_nodes.insert(first_inst, ex);
+        block->_nodes.remove(insidx);
+        block->_nodes.insert(first_inst, ex);
      }
      // Stop once a CreateEx or any other node is found
      break;
@ -503,12 +482,12 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
    uint pressure[2], hrp_index[2];
    pressure[0] = pressure[1] = 0;
    hrp_index[0] = hrp_index[1] = last_inst+1;
-    b->_reg_pressure = b->_freg_pressure = 0;
+    block->_reg_pressure = block->_freg_pressure = 0;
    // Liveout things are presumed live for the whole block.  We accumulate
    // 'area' accordingly.  If they get killed in the block, we'll subtract
    // the unused part of the block from the area.
    int inst_count = last_inst - first_inst;
-    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
    assert(!(cost < 0.0), "negative spill cost" );
    IndexSetIterator elements(&liveout);
    uint lidx;
@ -519,13 +498,15 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
      if (lrg.mask().is_UP() && lrg.mask_size()) {
        if (lrg._is_float || lrg._is_vector) {   // Count float pressure
          pressure[1] += lrg.reg_pressure();
-          if( pressure[1] > b->_freg_pressure )
-            b->_freg_pressure = pressure[1];
+          if (pressure[1] > block->_freg_pressure) {
+            block->_freg_pressure = pressure[1];
+          }
          // Count int pressure, but do not count the SP, flags
-        } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+        } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
          pressure[0] += lrg.reg_pressure();
-          if( pressure[0] > b->_reg_pressure )
-            b->_reg_pressure = pressure[0];
+          if (pressure[0] > block->_reg_pressure) {
+            block->_reg_pressure = pressure[0];
+          }
        }
      }
    }
@ -541,8 +522,8 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
    // value is then removed from the live-ness set and it's inputs are added
    // to the live-ness set.
    uint j;
-    for( j = last_inst + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j - 1];
+    for (j = last_inst + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];

      // Get value being defined
      uint r = _lrg_map.live_range_id(n);
@ -551,7 +532,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
      if(r) {
        // A DEF normally costs block frequency; rematerialized values are
        // removed from the DEF sight, so LOWER costs here.
-        lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
+        lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq;

        // If it is not live, then this instruction is dead.  Probably caused
        // by spilling and rematerialization.  Who cares why, yank this baby.
@ -560,7 +541,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
          if( !n->is_Proj() ||
              // Could also be a flags-projection of a dead ADD or such.
              (_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) {
-            b->_nodes.remove(j - 1);
+            block->_nodes.remove(j - 1);
            if (lrgs(r)._def == n) {
              lrgs(r)._def = 0;
            }
@ -580,21 +561,21 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
            RegMask itmp = lrgs(r).mask();
            itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
            int iregs = itmp.Size();
-            if( pressure[0]+iregs > b->_reg_pressure )
-              b->_reg_pressure = pressure[0]+iregs;
-            if( pressure[0]       <= (uint)INTPRESSURE &&
-                pressure[0]+iregs >  (uint)INTPRESSURE ) {
-              hrp_index[0] = j-1;
+            if (pressure[0]+iregs > block->_reg_pressure) {
+              block->_reg_pressure = pressure[0] + iregs;
+            }
+            if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) {
+              hrp_index[0] = j - 1;
            }
            // Count the float-only registers
            RegMask ftmp = lrgs(r).mask();
            ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
            int fregs = ftmp.Size();
-            if( pressure[1]+fregs > b->_freg_pressure )
-              b->_freg_pressure = pressure[1]+fregs;
-            if( pressure[1]       <= (uint)FLOATPRESSURE &&
-                pressure[1]+fregs >  (uint)FLOATPRESSURE ) {
-              hrp_index[1] = j-1;
+            if (pressure[1] + fregs > block->_freg_pressure) {
+              block->_freg_pressure = pressure[1] + fregs;
+            }
+            if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) {
+              hrp_index[1] = j - 1;
            }
          }

@ -607,7 +588,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
          if( n->is_SpillCopy()
              && lrgs(r).is_singledef()        // MultiDef live range can still split
              && n->outcnt() == 1              // and use must be in this block
-              && _cfg.get_block_for_node(n->unique_out()) == b ) {
+              && _cfg.get_block_for_node(n->unique_out()) == block) {
            // All single-use MachSpillCopy(s) that immediately precede their
            // use must color early.  If a longer live range steals their
            // color, the spill copy will split and may push another spill copy
@ -617,14 +598,16 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
            //

            Node *single_use = n->unique_out();
-            assert( b->find_node(single_use) >= j, "Use must be later in block");
+            assert(block->find_node(single_use) >= j, "Use must be later in block");
            // Use can be earlier in block if it is a Phi, but then I should be a MultiDef

            // Find first non SpillCopy 'm' that follows the current instruction
            // (j - 1) is index for current instruction 'n'
            Node *m = n;
-            for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
-            if( m == single_use ) {
+            for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
+              m = block->_nodes[i];
+            }
+            if (m == single_use) {
              lrgs(r)._area = 0.0;
            }
          }
@ -633,7 +616,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
          if( liveout.remove(r) ) {
            // Adjust register pressure.
            // Capture last hi-to-lo pressure transition
-            lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
+            lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
            assert( pressure[0] == count_int_pressure  (&liveout), "" );
            assert( pressure[1] == count_float_pressure(&liveout), "" );
          }
@ -646,7 +629,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
            if (liveout.remove(x)) {
              lrgs(x)._area -= cost;
              // Adjust register pressure.
-              lower_pressure(&lrgs(x), j-1, b, pressure, hrp_index);
+              lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
              assert( pressure[0] == count_int_pressure  (&liveout), "" );
              assert( pressure[1] == count_float_pressure(&liveout), "" );
            }
@ -718,7 +701,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {

      // Area remaining in the block
      inst_count--;
-      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);

      // Make all inputs live
      if( !n->is_Phi() ) {      // Phi function uses come from prior block
@ -743,7 +726,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
          if (k < debug_start) {
            // A USE costs twice block frequency (once for the Load, once
            // for a Load-delay).  Rematerialized uses only cost once.
-            lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
+            lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
          }
          // It is live now
          if (liveout.insert(x)) {
@ -753,12 +736,14 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
            if (lrg.mask().is_UP() && lrg.mask_size()) {
              if (lrg._is_float || lrg._is_vector) {
                pressure[1] += lrg.reg_pressure();
-                if( pressure[1] > b->_freg_pressure )
-                  b->_freg_pressure = pressure[1];
+                if (pressure[1] > block->_freg_pressure)  {
+                  block->_freg_pressure = pressure[1];
+                }
              } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
                pressure[0] += lrg.reg_pressure();
-                if( pressure[0] > b->_reg_pressure )
-                  b->_reg_pressure = pressure[0];
+                if (pressure[0] > block->_reg_pressure) {
+                  block->_reg_pressure = pressure[0];
+                }
              }
            }
            assert( pressure[0] == count_int_pressure  (&liveout), "" );
@ -772,44 +757,47 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
    // If we run off the top of the block with high pressure and
    // never see a hi-to-low pressure transition, just record that
    // the whole block is high pressure.
-    if( pressure[0] > (uint)INTPRESSURE   ) {
+    if (pressure[0] > (uint)INTPRESSURE) {
      hrp_index[0] = 0;
-      if( pressure[0] > b->_reg_pressure )
-        b->_reg_pressure = pressure[0];
+      if (pressure[0] > block->_reg_pressure) {
+        block->_reg_pressure = pressure[0];
+      }
    }
-    if( pressure[1] > (uint)FLOATPRESSURE ) {
+    if (pressure[1] > (uint)FLOATPRESSURE) {
      hrp_index[1] = 0;
-      if( pressure[1] > b->_freg_pressure )
-        b->_freg_pressure = pressure[1];
+      if (pressure[1] > block->_freg_pressure) {
+        block->_freg_pressure = pressure[1];
+      }
    }

    // Compute high pressure indice; avoid landing in the middle of projnodes
    j = hrp_index[0];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->_nodes.size() && j < block->end_idx() + 1) {
+      Node* cur = block->_nodes[j];
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
        j--;
-        cur = b->_nodes[j];
+        cur = block->_nodes[j];
      }
    }
-    b->_ihrp_index = j;
+    block->_ihrp_index = j;
    j = hrp_index[1];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->_nodes.size() && j < block->end_idx() + 1) {
+      Node* cur = block->_nodes[j];
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
        j--;
-        cur = b->_nodes[j];
+        cur = block->_nodes[j];
      }
    }
-    b->_fhrp_index = j;
+    block->_fhrp_index = j;

 #ifndef PRODUCT
    // Gather Register Pressure Statistics
    if( PrintOptoStatistics ) {
-      if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
+      if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
        _high_pressure++;
-      else
+      } else {
        _low_pressure++;
+      }
    }
 #endif
  } // End of for all blocks
--- a/hotspot/src/share/vm/opto/lcm.cpp
+++ b/hotspot/src/share/vm/opto/lcm.cpp
@ -501,7 +501,7 @@ Node *Block::select(PhaseCFG *cfg, Node_List &worklist, GrowableArray<int> &read
      n_choice = 1;
    }

-    uint n_latency = cfg->_node_latency->at_grow(n->_idx);
+    uint n_latency = cfg->get_latency_for_node(n);
    uint n_score   = n->req();   // Many inputs get high score to break ties

    // Keep best latency found
@ -797,7 +797,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
        Node     *n = _nodes[j];
        int     idx = n->_idx;
        tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
-        tty->print("latency:%3d  ", cfg->_node_latency->at_grow(idx));
+        tty->print("latency:%3d  ", cfg->get_latency_for_node(n));
        tty->print("%4d: %s\n", idx, n->Name());
      }
    }
@ -825,7 +825,7 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, GrowableArray<int> &
 #ifndef PRODUCT
    if (cfg->trace_opto_pipelining()) {
      tty->print("#    select %d: %s", n->_idx, n->Name());
-      tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx));
+      tty->print(", latency:%d", cfg->get_latency_for_node(n));
      n->dump();
      if (Verbose) {
        tty->print("#   ready list:");
--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@ -213,6 +213,7 @@ class LibraryCallKit : public GraphKit {
  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
  bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
+  static bool klass_needs_init_guard(Node* kls);
  bool inline_unsafe_allocate();
  bool inline_unsafe_copyMemory();
  bool inline_native_currentThread();
@ -2892,8 +2893,21 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
  }
 }

+bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
+  if (!kls->is_Con()) {
+    return true;
+  }
+  const TypeKlassPtr* klsptr = kls->bottom_type()->isa_klassptr();
+  if (klsptr == NULL) {
+    return true;
+  }
+  ciInstanceKlass* ik = klsptr->klass()->as_instance_klass();
+  // don't need a guard for a klass that is already initialized
+  return !ik->is_initialized();
+}
+
 //----------------------------inline_unsafe_allocate---------------------------
-// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
+// public native Object sun.misc.Unsafe.allocateInstance(Class<?> cls);
 bool LibraryCallKit::inline_unsafe_allocate() {
  if (callee()->is_static())  return false;  // caller must have the capability!

@ -2905,16 +2919,19 @@ bool LibraryCallKit::inline_unsafe_allocate() {
  kls = null_check(kls);
  if (stopped())  return true;  // argument was like int.class

-  // Note:  The argument might still be an illegal value like
-  // Serializable.class or Object[].class.   The runtime will handle it.
-  // But we must make an explicit check for initialization.
-  Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
-  // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
-  // can generate code to load it as unsigned byte.
-  Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
-  Node* bits = intcon(InstanceKlass::fully_initialized);
-  Node* test = _gvn.transform(new (C) SubINode(inst, bits));
-  // The 'test' is non-zero if we need to take a slow path.
+  Node* test = NULL;
+  if (LibraryCallKit::klass_needs_init_guard(kls)) {
+    // Note:  The argument might still be an illegal value like
+    // Serializable.class or Object[].class.   The runtime will handle it.
+    // But we must make an explicit check for initialization.
+    Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
+    // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
+    // can generate code to load it as unsigned byte.
+    Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
+    Node* bits = intcon(InstanceKlass::fully_initialized);
+    test = _gvn.transform(new (C) SubINode(inst, bits));
+    // The 'test' is non-zero if we need to take a slow path.
+  }

  Node* obj = new_instance(kls, test);
  set_result(obj);
--- a/hotspot/src/share/vm/opto/live.cpp
+++ b/hotspot/src/share/vm/opto/live.cpp
@ -30,9 +30,6 @@
 #include "opto/machnode.hpp"


-
-//=============================================================================
-//------------------------------PhaseLive--------------------------------------
 // Compute live-in/live-out.  We use a totally incremental algorithm.  The LIVE
 // problem is monotonic.  The steady-state solution looks like this: pull a
 // block from the worklist.  It has a set of delta's - values which are newly
@ -53,9 +50,9 @@ void PhaseLive::compute(uint maxlrg) {

  // Init the sparse live arrays.  This data is live on exit from here!
  // The _live info is the live-out info.
-  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
+  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
  uint i;
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
    _live[i].initialize(_maxlrg);
  }

@ -65,14 +62,14 @@ void PhaseLive::compute(uint maxlrg) {
  // Does the memory used by _defs and _deltas get reclaimed?  Does it matter?  TT

  // Array of values defined locally in blocks
-  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks());
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
    _defs[i].initialize(_maxlrg);
  }

  // Array of delta-set pointers, indexed by block pre_order-1.
-  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
-  memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
+  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
+  memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());

  _free_IndexSet = NULL;

@ -80,31 +77,32 @@ void PhaseLive::compute(uint maxlrg) {
  VectorSet first_pass(Thread::current()->resource_area());

  // Outer loop: must compute local live-in sets and push into predecessors.
-  uint iters = _cfg._num_blocks;        // stat counters
-  for( uint j=_cfg._num_blocks; j>0; j-- ) {
-    Block *b = _cfg._blocks[j-1];
+  for (uint j = _cfg.number_of_blocks(); j > 0; j--) {
+    Block* block = _cfg.get_block(j - 1);

    // Compute the local live-in set.  Start with any new live-out bits.
-    IndexSet *use = getset( b );
-    IndexSet *def = &_defs[b->_pre_order-1];
+    IndexSet* use = getset(block);
+    IndexSet* def = &_defs[block->_pre_order-1];
    DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
    uint i;
-    for( i=b->_nodes.size(); i>1; i-- ) {
-      Node *n = b->_nodes[i-1];
-      if( n->is_Phi() ) break;
+    for (i = block->_nodes.size(); i > 1; i--) {
+      Node* n = block->_nodes[i-1];
+      if (n->is_Phi()) {
+        break;
+      }

      uint r = _names[n->_idx];
      assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
      def->insert( r );
      use->remove( r );
      uint cnt = n->req();
-      for( uint k=1; k<cnt; k++ ) {
+      for (uint k = 1; k < cnt; k++) {
        Node *nk = n->in(k);
        uint nkidx = nk->_idx;
-        if (_cfg.get_block_for_node(nk) != b) {
+        if (_cfg.get_block_for_node(nk) != block) {
          uint u = _names[nkidx];
-          use->insert( u );
-          DEBUG_ONLY(def_outside->insert( u );)
+          use->insert(u);
+          DEBUG_ONLY(def_outside->insert(u);)
        }
      }
    }
@ -113,41 +111,38 @@ void PhaseLive::compute(uint maxlrg) {
    _free_IndexSet = def_outside;     // Drop onto free list
 #endif
    // Remove anything defined by Phis and the block start instruction
-    for( uint k=i; k>0; k-- ) {
-      uint r = _names[b->_nodes[k-1]->_idx];
-      def->insert( r );
-      use->remove( r );
+    for (uint k = i; k > 0; k--) {
+      uint r = _names[block->_nodes[k - 1]->_idx];
+      def->insert(r);
+      use->remove(r);
    }

    // Push these live-in things to predecessors
-    for( uint l=1; l<b->num_preds(); l++ ) {
-      Block *p = _cfg.get_block_for_node(b->pred(l));
-      add_liveout( p, use, first_pass );
+    for (uint l = 1; l < block->num_preds(); l++) {
+      Block* p = _cfg.get_block_for_node(block->pred(l));
+      add_liveout(p, use, first_pass);

      // PhiNode uses go in the live-out set of prior blocks.
-      for( uint k=i; k>0; k-- )
-        add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
+      for (uint k = i; k > 0; k--) {
+        add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass);
+      }
    }
-    freeset( b );
-    first_pass.set(b->_pre_order);
+    freeset(block);
+    first_pass.set(block->_pre_order);

    // Inner loop: blocks that picked up new live-out values to be propagated
-    while( _worklist->size() ) {
-        // !!!!!
-// #ifdef ASSERT
-      iters++;
-// #endif
-      Block *b = _worklist->pop();
-      IndexSet *delta = getset(b);
+    while (_worklist->size()) {
+      Block* block = _worklist->pop();
+      IndexSet *delta = getset(block);
      assert( delta->count(), "missing delta set" );

      // Add new-live-in to predecessors live-out sets
-      for (uint l = 1; l < b->num_preds(); l++) {
-        Block* block = _cfg.get_block_for_node(b->pred(l));
-        add_liveout(block, delta, first_pass);
+      for (uint l = 1; l < block->num_preds(); l++) {
+        Block* predecessor = _cfg.get_block_for_node(block->pred(l));
+        add_liveout(predecessor, delta, first_pass);
      }

-      freeset(b);
+      freeset(block);
    } // End of while-worklist-not-empty

  } // End of for-all-blocks-outer-loop
@ -155,7 +150,7 @@ void PhaseLive::compute(uint maxlrg) {
  // We explicitly clear all of the IndexSets which we are about to release.
  // This allows us to recycle their internal memory into IndexSet's free list.

-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
    _defs[i].clear();
    if (_deltas[i]) {
      // Is this always true?
@ -171,13 +166,11 @@ void PhaseLive::compute(uint maxlrg) {

 }

-//------------------------------stats------------------------------------------
 #ifndef PRODUCT
 void PhaseLive::stats(uint iters) const {
 }
 #endif

-//------------------------------getset-----------------------------------------
 // Get an IndexSet for a block.  Return existing one, if any.  Make a new
 // empty one if a prior one does not exist.
 IndexSet *PhaseLive::getset( Block *p ) {
@ -188,7 +181,6 @@ IndexSet *PhaseLive::getset( Block *p ) {
  return delta;                 // Return set of new live-out items
 }

-//------------------------------getfreeset-------------------------------------
 // Pull from free list, or allocate.  Internal allocation on the returned set
 // is always from thread local storage.
 IndexSet *PhaseLive::getfreeset( ) {
@ -207,7 +199,6 @@ IndexSet *PhaseLive::getfreeset( ) {
  return f;
 }

-//------------------------------freeset----------------------------------------
 // Free an IndexSet from a block.
 void PhaseLive::freeset( const Block *p ) {
  IndexSet *f = _deltas[p->_pre_order-1];
@ -216,7 +207,6 @@ void PhaseLive::freeset( const Block *p ) {
  _deltas[p->_pre_order-1] = NULL;
 }

-//------------------------------add_liveout------------------------------------
 // Add a live-out value to a given blocks live-out set.  If it is new, then
 // also add it to the delta set and stick the block on the worklist.
 void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
@ -233,8 +223,6 @@ void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
  }
 }

-
-//------------------------------add_liveout------------------------------------
 // Add a vector of live-out values to a given blocks live-out set.
 void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
  IndexSet *live = &_live[p->_pre_order-1];
@ -262,7 +250,6 @@ void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
 }

 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 // Dump the live-out set for a block
 void PhaseLive::dump( const Block *b ) const {
  tty->print("Block %d: ",b->_pre_order);
@ -275,18 +262,19 @@ void PhaseLive::dump( const Block *b ) const {
  tty->print("\n");
 }

-//------------------------------verify_base_ptrs-------------------------------
 // Verify that base pointers and derived pointers are still sane.
 void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
 #ifdef ASSERT
  Unique_Node_List worklist(a);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
-      if( n->is_Phi() ) break;
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j-1];
+      if (n->is_Phi()) {
+        break;
+      }
      // Found a safepoint?
-      if( n->is_MachSafePoint() ) {
+      if (n->is_MachSafePoint()) {
        MachSafePointNode *sfpt = n->as_MachSafePoint();
        JVMState* jvms = sfpt->jvms();
        if (jvms != NULL) {
@ -358,7 +346,6 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
 #endif
 }

-//------------------------------verify-------------------------------------
 // Verify that graphs and base pointers are still sane.
 void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
 #ifdef ASSERT
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@ -67,8 +67,8 @@ const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
 const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;

 //---------------------------Matcher-------------------------------------------
-Matcher::Matcher( Node_List &proj_list ) :
-  PhaseTransform( Phase::Ins_Select ),
+Matcher::Matcher()
+: PhaseTransform( Phase::Ins_Select ),
 #ifdef ASSERT
  _old2new_map(C->comp_arena()),
  _new2old_map(C->comp_arena()),
@ -78,7 +78,7 @@ Matcher::Matcher( Node_List &proj_list ) :
  _swallowed(swallowed),
  _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
  _end_inst_chain_rule(_END_INST_CHAIN_RULE),
-  _must_clone(must_clone), _proj_list(proj_list),
+  _must_clone(must_clone),
  _register_save_policy(register_save_policy),
  _c_reg_save_policy(c_reg_save_policy),
  _register_save_type(register_save_type),
@ -1304,8 +1304,9 @@ MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
        proj->_rout.Insert(OptoReg::Name(i));
    }
-    if( proj->_rout.is_NotEmpty() )
-      _proj_list.push(proj);
+    if (proj->_rout.is_NotEmpty()) {
+      push_projection(proj);
+    }
  }
  // Transfer the safepoint information from the call to the mcall
  // Move the JVMState list
@ -1685,14 +1686,15 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  }

  // If the _leaf is an AddP, insert the base edge
-  if( leaf->is_AddP() )
+  if (leaf->is_AddP()) {
    mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
+  }

-  uint num_proj = _proj_list.size();
+  uint number_of_projections_prior = number_of_projections();

  // Perform any 1-to-many expansions required
-  MachNode *ex = mach->Expand(s,_proj_list, mem);
-  if( ex != mach ) {
+  MachNode *ex = mach->Expand(s, _projection_list, mem);
+  if (ex != mach) {
    assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
    if( ex->in(1)->is_Con() )
      ex->in(1)->set_req(0, C->root());
@ -1713,7 +1715,7 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  // generated belatedly during spill code generation.
  if (_allocation_started) {
    guarantee(ex == mach, "no expand rules during spill generation");
-    guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
+    guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
  }

  if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
--- a/hotspot/src/share/vm/opto/matcher.hpp
+++ b/hotspot/src/share/vm/opto/matcher.hpp
@ -88,7 +88,7 @@ class Matcher : public PhaseTransform {

  Node *transform( Node *dummy );

-  Node_List &_proj_list;        // For Machine nodes killing many values
+  Node_List _projection_list;        // For Machine nodes killing many values

  Node_Array _shared_nodes;

@ -183,10 +183,30 @@ public:
  void collect_null_checks( Node *proj, Node *orig_proj );
  void validate_null_checks( );

-  Matcher( Node_List &proj_list );
+  Matcher();
+
+  // Get a projection node at position pos
+  Node* get_projection(uint pos) {
+    return _projection_list[pos];
+  }
+
+  // Push a projection node onto the projection list
+  void push_projection(Node* node) {
+    _projection_list.push(node);
+  }
+
+  Node* pop_projection() {
+    return _projection_list.pop();
+  }
+
+  // Number of nodes in the projection list
+  uint number_of_projections() const {
+    return _projection_list.size();
+  }

  // Select instructions for entire method
-  void  match( );
+  void match();
+
  // Helper for match
  OptoReg::Name warp_incoming_stk_arg( VMReg reg );

--- a/hotspot/src/share/vm/opto/output.cpp
+++ b/hotspot/src/share/vm/opto/output.cpp
@ -54,11 +54,10 @@ extern uint size_deopt_handler();
 extern int emit_exception_handler(CodeBuffer &cbuf);
 extern int emit_deopt_handler(CodeBuffer &cbuf);

-//------------------------------Output-----------------------------------------
 // Convert Nodes to instruction bits and pass off to the VM
 void Compile::Output() {
  // RootNode goes
-  assert( _cfg->_broot->_nodes.size() == 0, "" );
+  assert( _cfg->get_root_block()->_nodes.size() == 0, "" );

  // The number of new nodes (mostly MachNop) is proportional to
  // the number of java calls and inner loops which are aligned.
@ -68,8 +67,8 @@ void Compile::Output() {
    return;
  }
  // Make sure I can find the Start Node
-  Block *entry = _cfg->_blocks[1];
-  Block *broot = _cfg->_broot;
+  Block *entry = _cfg->get_block(1);
+  Block *broot = _cfg->get_root_block();

  const StartNode *start = entry->_nodes[0]->as_Start();

@ -109,40 +108,44 @@ void Compile::Output() {
  }

  // Insert epilogs before every return
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
-    if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
-      Node *m = b->end();
-      if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
-        MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
-        b->add_inst( epilog );
-        _cfg->map_node_to_block(epilog, b);
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    Block* block = _cfg->get_block(i);
+    if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point?
+      Node* m = block->end();
+      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
+        MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
+        block->add_inst(epilog);
+        _cfg->map_node_to_block(epilog, block);
      }
    }
  }

 # ifdef ENABLE_ZAP_DEAD_LOCALS
-  if ( ZapDeadCompiledLocals )  Insert_zap_nodes();
+  if (ZapDeadCompiledLocals) {
+    Insert_zap_nodes();
+  }
 # endif

-  uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
-  blk_starts[0]    = 0;
+  uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
+  blk_starts[0] = 0;

  // Initialize code buffer and process short branches.
  CodeBuffer* cb = init_buffer(blk_starts);

-  if (cb == NULL || failing())  return;
+  if (cb == NULL || failing()) {
+    return;
+  }

  ScheduleAndBundle();

 #ifndef PRODUCT
  if (trace_opto_output()) {
    tty->print("\n---- After ScheduleAndBundle ----\n");
-    for (uint i = 0; i < _cfg->_num_blocks; i++) {
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
      tty->print("\nBB#%03d:\n", i);
-      Block *bb = _cfg->_blocks[i];
-      for (uint j = 0; j < bb->_nodes.size(); j++) {
-        Node *n = bb->_nodes[j];
+      Block* block = _cfg->get_block(i);
+      for (uint j = 0; j < block->_nodes.size(); j++) {
+        Node* n = block->_nodes[j];
        OptoReg::Name reg = _regalloc->get_reg_first(n);
        tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
        n->dump();
@ -151,11 +154,15 @@ void Compile::Output() {
  }
 #endif

-  if (failing())  return;
+  if (failing()) {
+    return;
+  }

  BuildOopMaps();

-  if (failing())  return;
+  if (failing())  {
+    return;
+  }

  fill_buffer(cb, blk_starts);
 }
@ -217,8 +224,8 @@ void Compile::Insert_zap_nodes() {
    return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care

  // Insert call to zap runtime stub before every node with an oop map
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
+  for( uint i=0; i<_cfg->number_of_blocks(); i++ ) {
+    Block *b = _cfg->get_block(i);
    for ( uint j = 0;  j < b->_nodes.size();  ++j ) {
      Node *n = b->_nodes[j];

@ -275,7 +282,6 @@ Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
  return _matcher->match_sfpt(ideal_node);
 }

-//------------------------------is_node_getting_a_safepoint--------------------
 bool Compile::is_node_getting_a_safepoint( Node* n) {
  // This code duplicates the logic prior to the call of add_safepoint
  // below in this file.
@ -285,7 +291,6 @@ bool Compile::is_node_getting_a_safepoint( Node* n) {

 # endif // ENABLE_ZAP_DEAD_LOCALS

-//------------------------------compute_loop_first_inst_sizes------------------
 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top
 // of a loop. When aligning a loop we need to provide enough instructions
 // in cpu's fetch buffer to feed decoders. The loop alignment could be
@ -302,42 +307,39 @@ void Compile::compute_loop_first_inst_sizes() {
  // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
  // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
  // equal to 11 bytes which is the largest address NOP instruction.
-  if( MaxLoopPad < OptoLoopAlignment-1 ) {
-    uint last_block = _cfg->_num_blocks-1;
-    for( uint i=1; i <= last_block; i++ ) {
-      Block *b = _cfg->_blocks[i];
+  if (MaxLoopPad < OptoLoopAlignment - 1) {
+    uint last_block = _cfg->number_of_blocks() - 1;
+    for (uint i = 1; i <= last_block; i++) {
+      Block* block = _cfg->get_block(i);
      // Check the first loop's block which requires an alignment.
-      if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
+      if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
        uint sum_size = 0;
        uint inst_cnt = NumberOfLoopInstrToAlign;
-        inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
+        inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);

        // Check subsequent fallthrough blocks if the loop's first
        // block(s) does not have enough instructions.
-        Block *nb = b;
-        while( inst_cnt > 0 &&
-               i < last_block &&
-               !_cfg->_blocks[i+1]->has_loop_alignment() &&
-               !nb->has_successor(b) ) {
+        Block *nb = block;
+        while(inst_cnt > 0 &&
+              i < last_block &&
+              !_cfg->get_block(i + 1)->has_loop_alignment() &&
+              !nb->has_successor(block)) {
          i++;
-          nb = _cfg->_blocks[i];
+          nb = _cfg->get_block(i);
          inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
        } // while( inst_cnt > 0 && i < last_block  )

-        b->set_first_inst_size(sum_size);
+        block->set_first_inst_size(sum_size);
      } // f( b->head()->is_Loop() )
    } // for( i <= last_block )
  } // if( MaxLoopPad < OptoLoopAlignment-1 )
 }

-//----------------------shorten_branches---------------------------------------
 // The architecture description provides short branch variants for some long
 // branch instructions. Replace eligible long branches with short branches.
 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
-
-  // ------------------
  // Compute size of each block, method size, and relocation information size
-  uint nblocks  = _cfg->_num_blocks;
+  uint nblocks  = _cfg->number_of_blocks();

  uint*      jmp_offset = NEW_RESOURCE_ARRAY(uint,nblocks);
  uint*      jmp_size   = NEW_RESOURCE_ARRAY(uint,nblocks);
@ -364,7 +366,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
  uint last_avoid_back_to_back_adr = max_uint;
  uint nop_size = (new (this) MachNopNode())->size(_regalloc);
  for (uint i = 0; i < nblocks; i++) { // For all blocks
-    Block *b = _cfg->_blocks[i];
+    Block* block = _cfg->get_block(i);

    // During short branch replacement, we store the relative (to blk_starts)
    // offset of jump in jmp_offset, rather than the absolute offset of jump.
@ -377,10 +379,10 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
    DEBUG_ONLY( jmp_rule[i]   = 0; )

    // Sum all instruction sizes to compute block size
-    uint last_inst = b->_nodes.size();
+    uint last_inst = block->_nodes.size();
    uint blk_size = 0;
    for (uint j = 0; j < last_inst; j++) {
-      Node* nj = b->_nodes[j];
+      Node* nj = block->_nodes[j];
      // Handle machine instruction nodes
      if (nj->is_Mach()) {
        MachNode *mach = nj->as_Mach();
@ -441,8 +443,8 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
    // When the next block starts a loop, we may insert pad NOP
    // instructions.  Since we cannot know our future alignment,
    // assume the worst.
-    if (i< nblocks-1) {
-      Block *nb = _cfg->_blocks[i+1];
+    if (i < nblocks - 1) {
+      Block* nb = _cfg->get_block(i + 1);
      int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
      if (max_loop_pad > 0) {
        assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
@ -473,26 +475,26 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
    has_short_branch_candidate = false;
    int adjust_block_start = 0;
    for (uint i = 0; i < nblocks; i++) {
-      Block *b = _cfg->_blocks[i];
+      Block* block = _cfg->get_block(i);
      int idx = jmp_nidx[i];
-      MachNode* mach = (idx == -1) ? NULL: b->_nodes[idx]->as_Mach();
+      MachNode* mach = (idx == -1) ? NULL: block->_nodes[idx]->as_Mach();
      if (mach != NULL && mach->may_be_short_branch()) {
 #ifdef ASSERT
        assert(jmp_size[i] > 0 && mach->is_MachBranch(), "sanity");
        int j;
        // Find the branch; ignore trailing NOPs.
-        for (j = b->_nodes.size()-1; j>=0; j--) {
-          Node* n = b->_nodes[j];
+        for (j = block->_nodes.size()-1; j>=0; j--) {
+          Node* n = block->_nodes[j];
          if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con)
            break;
        }
-        assert(j >= 0 && j == idx && b->_nodes[j] == (Node*)mach, "sanity");
+        assert(j >= 0 && j == idx && block->_nodes[j] == (Node*)mach, "sanity");
 #endif
        int br_size = jmp_size[i];
        int br_offs = blk_starts[i] + jmp_offset[i];

        // This requires the TRUE branch target be in succs[0]
-        uint bnum = b->non_connector_successor(0)->_pre_order;
+        uint bnum = block->non_connector_successor(0)->_pre_order;
        int offset = blk_starts[bnum] - br_offs;
        if (bnum > i) { // adjust following block's offset
          offset -= adjust_block_start;
@ -520,7 +522,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size
            diff -= nop_size;
          }
          adjust_block_start += diff;
-          b->_nodes.map(idx, replacement);
+          block->_nodes.map(idx, replacement);
          mach->subsume_by(replacement, C);
          mach = replacement;
          progress = true;
@ -1083,8 +1085,8 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {
  if (has_mach_constant_base_node()) {
    // Fill the constant table.
    // Note:  This must happen before shorten_branches.
-    for (uint i = 0; i < _cfg->_num_blocks; i++) {
-      Block* b = _cfg->_blocks[i];
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+      Block* b = _cfg->get_block(i);

      for (uint j = 0; j < b->_nodes.size(); j++) {
        Node* n = b->_nodes[j];
@ -1170,7 +1172,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
  // !!!!! This preserves old handling of oopmaps for now
  debug_info()->set_oopmaps(_oop_map_set);

-  uint nblocks  = _cfg->_num_blocks;
+  uint nblocks  = _cfg->number_of_blocks();
  // Count and start of implicit null check instructions
  uint inct_cnt = 0;
  uint *inct_starts = NEW_RESOURCE_ARRAY(uint, nblocks+1);
@ -1218,21 +1220,21 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
  // Now fill in the code buffer
  Node *delay_slot = NULL;

-  for (uint i=0; i < nblocks; i++) {
-    Block *b = _cfg->_blocks[i];
-
-    Node *head = b->head();
+  for (uint i = 0; i < nblocks; i++) {
+    Block* block = _cfg->get_block(i);
+    Node* head = block->head();

    // If this block needs to start aligned (i.e, can be reached other
    // than by falling-thru from the previous block), then force the
    // start of a new bundle.
-    if (Pipeline::requires_bundling() && starts_bundle(head))
+    if (Pipeline::requires_bundling() && starts_bundle(head)) {
      cb->flush_bundle(true);
+    }

 #ifdef ASSERT
-    if (!b->is_connector()) {
+    if (!block->is_connector()) {
      stringStream st;
-      b->dump_head(_cfg, &st);
+      block->dump_head(_cfg, &st);
      MacroAssembler(cb).block_comment(st.as_string());
    }
    jmp_target[i] = 0;
@ -1243,16 +1245,16 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
    int blk_offset = current_offset;

    // Define the label at the beginning of the basic block
-    MacroAssembler(cb).bind(blk_labels[b->_pre_order]);
+    MacroAssembler(cb).bind(blk_labels[block->_pre_order]);

-    uint last_inst = b->_nodes.size();
+    uint last_inst = block->_nodes.size();

    // Emit block normally, except for last instruction.
    // Emit means "dump code bits into code buffer".
    for (uint j = 0; j<last_inst; j++) {

      // Get the node
-      Node* n = b->_nodes[j];
+      Node* n = block->_nodes[j];

      // See if delay slots are supported
      if (valid_bundle_info(n) &&
@ -1306,9 +1308,9 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
          assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
          int nops_cnt = padding / nop_size;
          MachNode *nop = new (this) MachNopNode(nops_cnt);
-          b->_nodes.insert(j++, nop);
+          block->_nodes.insert(j++, nop);
          last_inst++;
-          _cfg->map_node_to_block(nop, b);
+          _cfg->map_node_to_block(nop, block);
          nop->emit(*cb, _regalloc);
          cb->flush_bundle(true);
          current_offset = cb->insts_size();
@ -1322,7 +1324,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
          mcall->method_set((intptr_t)mcall->entry_point());

          // Save the return address
-          call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
+          call_returns[block->_pre_order] = current_offset + mcall->ret_addr_offset();

          if (mcall->is_MachCallLeaf()) {
            is_mcall = false;
@ -1359,7 +1361,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
        // If this is a branch, then fill in the label with the target BB's label
        else if (mach->is_MachBranch()) {
          // This requires the TRUE branch target be in succs[0]
-          uint block_num = b->non_connector_successor(0)->_pre_order;
+          uint block_num = block->non_connector_successor(0)->_pre_order;

          // Try to replace long branch if delay slot is not used,
          // it is mostly for back branches since forward branch's
@ -1392,8 +1394,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
              // Insert padding between avoid_back_to_back branches.
              if (needs_padding && replacement->avoid_back_to_back()) {
                MachNode *nop = new (this) MachNopNode();
-                b->_nodes.insert(j++, nop);
-                _cfg->map_node_to_block(nop, b);
+                block->_nodes.insert(j++, nop);
+                _cfg->map_node_to_block(nop, block);
                last_inst++;
                nop->emit(*cb, _regalloc);
                cb->flush_bundle(true);
@ -1405,7 +1407,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
              jmp_size[i]   = new_size;
              jmp_rule[i]   = mach->rule();
 #endif
-              b->_nodes.map(j, replacement);
+              block->_nodes.map(j, replacement);
              mach->subsume_by(replacement, C);
              n    = replacement;
              mach = replacement;
@ -1413,8 +1415,8 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
          }
          mach->as_MachBranch()->label_set( &blk_labels[block_num], block_num );
        } else if (mach->ideal_Opcode() == Op_Jump) {
-          for (uint h = 0; h < b->_num_succs; h++) {
-            Block* succs_block = b->_succs[h];
+          for (uint h = 0; h < block->_num_succs; h++) {
+            Block* succs_block = block->_succs[h];
            for (uint j = 1; j < succs_block->num_preds(); j++) {
              Node* jpn = succs_block->pred(j);
              if (jpn->is_JumpProj() && jpn->in(0) == mach) {
@ -1425,7 +1427,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
            }
          }
        }
-
 #ifdef ASSERT
        // Check that oop-store precedes the card-mark
        else if (mach->ideal_Opcode() == Op_StoreCM) {
@ -1436,17 +1437,18 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
            if (oop_store == NULL) continue;
            count++;
            uint i4;
-            for( i4 = 0; i4 < last_inst; ++i4 ) {
-              if( b->_nodes[i4] == oop_store ) break;
+            for (i4 = 0; i4 < last_inst; ++i4) {
+              if (block->_nodes[i4] == oop_store) {
+                break;
+              }
            }
            // Note: This test can provide a false failure if other precedence
            // edges have been added to the storeCMNode.
-            assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+            assert(i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
          }
          assert(count > 0, "storeCM expects at least one precedence edge");
        }
 #endif
-
        else if (!n->is_Proj()) {
          // Remember the beginning of the previous instruction, in case
          // it's followed by a flag-kill and a null-check.  Happens on
@ -1542,12 +1544,12 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
    // If the next block is the top of a loop, pad this block out to align
    // the loop top a little. Helps prevent pipe stalls at loop back branches.
    if (i < nblocks-1) {
-      Block *nb = _cfg->_blocks[i+1];
+      Block *nb = _cfg->get_block(i + 1);
      int padding = nb->alignment_padding(current_offset);
      if( padding > 0 ) {
        MachNode *nop = new (this) MachNopNode(padding / nop_size);
-        b->_nodes.insert( b->_nodes.size(), nop );
-        _cfg->map_node_to_block(nop, b);
+        block->_nodes.insert(block->_nodes.size(), nop);
+        _cfg->map_node_to_block(nop, block);
        nop->emit(*cb, _regalloc);
        current_offset = cb->insts_size();
      }
@ -1587,8 +1589,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
  }
 #endif

-  // ------------------
-
 #ifndef PRODUCT
  // Information on the size of the method, without the extraneous code
  Scheduling::increment_method_size(cb->insts_size());
@ -1649,52 +1649,55 @@ void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_start
  _inc_table.set_size(cnt);

  uint inct_cnt = 0;
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    Block* block = _cfg->get_block(i);
    Node *n = NULL;
    int j;

    // Find the branch; ignore trailing NOPs.
-    for( j = b->_nodes.size()-1; j>=0; j-- ) {
-      n = b->_nodes[j];
-      if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
+    for (j = block->_nodes.size() - 1; j >= 0; j--) {
+      n = block->_nodes[j];
+      if (!n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con) {
        break;
+      }
    }

    // If we didn't find anything, continue
-    if( j < 0 ) continue;
+    if (j < 0) {
+      continue;
+    }

    // Compute ExceptionHandlerTable subtable entry and add it
    // (skip empty blocks)
-    if( n->is_Catch() ) {
+    if (n->is_Catch()) {

      // Get the offset of the return from the call
-      uint call_return = call_returns[b->_pre_order];
+      uint call_return = call_returns[block->_pre_order];
 #ifdef ASSERT
      assert( call_return > 0, "no call seen for this basic block" );
-      while( b->_nodes[--j]->is_MachProj() ) ;
-      assert( b->_nodes[j]->is_MachCall(), "CatchProj must follow call" );
+      while (block->_nodes[--j]->is_MachProj()) ;
+      assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call");
 #endif
      // last instruction is a CatchNode, find it's CatchProjNodes
-      int nof_succs = b->_num_succs;
+      int nof_succs = block->_num_succs;
      // allocate space
      GrowableArray<intptr_t> handler_bcis(nof_succs);
      GrowableArray<intptr_t> handler_pcos(nof_succs);
      // iterate through all successors
      for (int j = 0; j < nof_succs; j++) {
-        Block* s = b->_succs[j];
+        Block* s = block->_succs[j];
        bool found_p = false;
-        for( uint k = 1; k < s->num_preds(); k++ ) {
-          Node *pk = s->pred(k);
-          if( pk->is_CatchProj() && pk->in(0) == n ) {
+        for (uint k = 1; k < s->num_preds(); k++) {
+          Node* pk = s->pred(k);
+          if (pk->is_CatchProj() && pk->in(0) == n) {
            const CatchProjNode* p = pk->as_CatchProj();
            found_p = true;
            // add the corresponding handler bci & pco information
-            if( p->_con != CatchProjNode::fall_through_index ) {
+            if (p->_con != CatchProjNode::fall_through_index) {
              // p leads to an exception handler (and is not fall through)
-              assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
+              assert(s == _cfg->get_block(s->_pre_order), "bad numbering");
              // no duplicates, please
-              if( !handler_bcis.contains(p->handler_bci()) ) {
+              if (!handler_bcis.contains(p->handler_bci())) {
                uint block_num = s->non_connector()->_pre_order;
                handler_bcis.append(p->handler_bci());
                handler_pcos.append(blk_labels[block_num].loc_pos());
@ -1713,9 +1716,9 @@ void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_start
    }

    // Handle implicit null exception table updates
-    if( n->is_MachNullCheck() ) {
-      uint block_num = b->non_connector_successor(0)->_pre_order;
-      _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
+    if (n->is_MachNullCheck()) {
+      uint block_num = block->non_connector_successor(0)->_pre_order;
+      _inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
      continue;
    }
  } // End of for all blocks fill in exception table entries
@ -1774,14 +1777,12 @@ Scheduling::Scheduling(Arena *arena, Compile &compile)
  memset(_current_latency,    0, node_max * sizeof(unsigned short));

  // Clear the bundling information
-  memcpy(_bundle_use_elements,
-    Pipeline_Use::elaborated_elements,
-    sizeof(Pipeline_Use::elaborated_elements));
+  memcpy(_bundle_use_elements, Pipeline_Use::elaborated_elements, sizeof(Pipeline_Use::elaborated_elements));

  // Get the last node
-  Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
+  Block* block = _cfg->get_block(_cfg->number_of_blocks() - 1);

-  _next_node = bb->_nodes[bb->_nodes.size()-1];
+  _next_node = block->_nodes[block->_nodes.size() - 1];
 }

 #ifndef PRODUCT
@ -1831,7 +1832,6 @@ void Scheduling::step_and_clear() {
    sizeof(Pipeline_Use::elaborated_elements));
 }

-//------------------------------ScheduleAndBundle------------------------------
 // Perform instruction scheduling and bundling over the sequence of
 // instructions in backwards order.
 void Compile::ScheduleAndBundle() {
@ -1858,7 +1858,6 @@ void Compile::ScheduleAndBundle() {
  scheduling.DoScheduling();
 }

-//------------------------------ComputeLocalLatenciesForward-------------------
 // Compute the latency of all the instructions.  This is fairly simple,
 // because we already have a legal ordering.  Walk over the instructions
 // from first to last, and compute the latency of the instruction based
@ -2028,7 +2027,6 @@ Node * Scheduling::ChooseNodeToBundle() {
  return _available[0];
 }

-//------------------------------AddNodeToAvailableList-------------------------
 void Scheduling::AddNodeToAvailableList(Node *n) {
  assert( !n->is_Proj(), "projections never directly made available" );
 #ifndef PRODUCT
@ -2074,7 +2072,6 @@ void Scheduling::AddNodeToAvailableList(Node *n) {
 #endif
 }

-//------------------------------DecrementUseCounts-----------------------------
 void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
  for ( uint i=0; i < n->len(); i++ ) {
    Node *def = n->in(i);
@ -2097,7 +2094,6 @@ void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
  }
 }

-//------------------------------AddNodeToBundle--------------------------------
 void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
 #ifndef PRODUCT
  if (_cfg->C->trace_opto_output()) {
@ -2312,7 +2308,6 @@ void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
  DecrementUseCounts(n,bb);
 }

-//------------------------------ComputeUseCount--------------------------------
 // This method sets the use count within a basic block.  We will ignore all
 // uses outside the current basic block.  As we are doing a backwards walk,
 // any node we reach that has a use count of 0 may be scheduled.  This also
@ -2397,20 +2392,22 @@ void Scheduling::DoScheduling() {
  Block *bb;

  // Walk over all the basic blocks in reverse order
-  for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
-    bb = _cfg->_blocks[i];
+  for (int i = _cfg->number_of_blocks() - 1; i >= 0; succ_bb = bb, i--) {
+    bb = _cfg->get_block(i);

 #ifndef PRODUCT
    if (_cfg->C->trace_opto_output()) {
      tty->print("#  Schedule BB#%03d (initial)\n", i);
-      for (uint j = 0; j < bb->_nodes.size(); j++)
+      for (uint j = 0; j < bb->_nodes.size(); j++) {
        bb->_nodes[j]->dump();
+      }
    }
 #endif

    // On the head node, skip processing
-    if( bb == _cfg->_broot )
+    if (bb == _cfg->get_root_block()) {
      continue;
+    }

    // Skip empty, connector blocks
    if (bb->is_connector())
@ -2547,7 +2544,6 @@ void Scheduling::DoScheduling() {

 } // end DoScheduling

-//------------------------------verify_good_schedule---------------------------
 // Verify that no live-range used in the block is killed in the block by a
 // wrong DEF.  This doesn't verify live-ranges that span blocks.

@ -2560,7 +2556,6 @@ static bool edge_from_to( Node *from, Node *to ) {
 }

 #ifdef ASSERT
-//------------------------------verify_do_def----------------------------------
 void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
  // Check for bad kills
  if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
@ -2576,7 +2571,6 @@ void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
  }
 }

-//------------------------------verify_good_schedule---------------------------
 void Scheduling::verify_good_schedule( Block *b, const char *msg ) {

  // Zap to something reasonable for the verify code
@ -2636,7 +2630,6 @@ static void add_prec_edge_from_to( Node *from, Node *to ) {
    from->add_prec(to);
 }

-//------------------------------anti_do_def------------------------------------
 void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
  if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
    return;
@ -2706,7 +2699,6 @@ void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is
  add_prec_edge_from_to(kill,pinch);
 }

-//------------------------------anti_do_use------------------------------------
 void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
  if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
    return;
@ -2727,7 +2719,6 @@ void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
  }
 }

-//------------------------------ComputeRegisterAntidependences-----------------
 // We insert antidependences between the reads and following write of
 // allocated registers to prevent illegal code motion. Hopefully, the
 // number of added references should be fairly small, especially as we
@ -2861,8 +2852,6 @@ void Scheduling::ComputeRegisterAntidependencies(Block *b) {
  }
 }

-//------------------------------garbage_collect_pinch_nodes-------------------------------
-
 // Garbage collect pinch nodes for reuse by other blocks.
 //
 // The block scheduler's insertion of anti-dependence
@ -2937,7 +2926,6 @@ void Scheduling::cleanup_pinch( Node *pinch ) {
  pinch->set_req(0, NULL);
 }

-//------------------------------print_statistics-------------------------------
 #ifndef PRODUCT

 void Scheduling::dump_available() const {
--- a/Show More
+++ b/Show More