Merge

2014-08-08 11:36:48 -07:00 · 2014-08-08 11:36:48 -07:00 · 29ec2196ef
commit 29ec2196ef
parent 6b0a761ca3 648272884a
145 changed files with 2727 additions and 855 deletions
--- a/hotspot/agent/src/os/solaris/proc/saproc.cpp
+++ b/hotspot/agent/src/os/solaris/proc/saproc.cpp
@ -661,30 +661,30 @@ init_classsharing_workaround(void *cd, const prmap_t* pmap, const char* obj_name
  // read FileMapHeader
  size_t n = read(fd, pheader, sizeof(struct FileMapHeader));
  if (n != sizeof(struct FileMapHeader)) {
-    free(pheader);
-    close(fd);
    char errMsg[ERR_MSG_SIZE];
    sprintf(errMsg, "unable to read shared archive file map header from %s", classes_jsa);
+    close(fd);
+    free(pheader);
    THROW_NEW_DEBUGGER_EXCEPTION_(errMsg, 1);
  }

  // check file magic
  if (pheader->_magic != 0xf00baba2) {
-    free(pheader);
-    close(fd);
    char errMsg[ERR_MSG_SIZE];
    sprintf(errMsg, "%s has bad shared archive magic 0x%x, expecting 0xf00baba2",
                   classes_jsa, pheader->_magic);
+    close(fd);
+    free(pheader);
    THROW_NEW_DEBUGGER_EXCEPTION_(errMsg, 1);
  }

  // check version
  if (pheader->_version != CURRENT_ARCHIVE_VERSION) {
-    free(pheader);
-    close(fd);
    char errMsg[ERR_MSG_SIZE];
    sprintf(errMsg, "%s has wrong shared archive version %d, expecting %d",
                   classes_jsa, pheader->_version, CURRENT_ARCHIVE_VERSION);
+    close(fd);
+    free(pheader);
    THROW_NEW_DEBUGGER_EXCEPTION_(errMsg, 1);
  }

--- a/hotspot/make/jprt.properties
+++ b/hotspot/make/jprt.properties
@ -356,14 +356,15 @@ jprt.make.rule.test.targets.standard.internalvmtests = \
  ${jprt.my.windows.x64}-fastdebug-c2-internalvmtests

 jprt.make.rule.test.targets.standard.reg.group = \
-  ${jprt.my.solaris.sparcv9}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.solaris.x64}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.linux.i586}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.linux.x64}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.windows.i586}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.windows.x64}-{product|fastdebug}-c2-GROUP, \
-  ${jprt.my.linux.i586}-{product|fastdebug}-c1-GROUP, \
-  ${jprt.my.windows.i586}-{product|fastdebug}-c1-GROUP
+  ${jprt.my.solaris.sparcv9}-fastdebug-c2-GROUP, \
+  ${jprt.my.solaris.x64}-fastdebug-c2-GROUP, \
+  ${jprt.my.linux.i586}-fastdebug-c2-GROUP, \
+  ${jprt.my.linux.x64}-fastdebug-c2-GROUP, \
+  ${jprt.my.macosx.x64}-fastdebug-c2-GROUP, \
+  ${jprt.my.windows.i586}-fastdebug-c2-GROUP, \
+  ${jprt.my.windows.x64}-fastdebug-c2-GROUP, \
+  ${jprt.my.linux.i586}-fastdebug-c1-GROUP, \
+  ${jprt.my.windows.i586}-fastdebug-c1-GROUP

 jprt.make.rule.test.targets.standard = \
  ${jprt.make.rule.test.targets.standard.client}, \
--- a/hotspot/make/solaris/makefiles/dtrace.make
+++ b/hotspot/make/solaris/makefiles/dtrace.make
@ -1,5 +1,5 @@
 #
-# Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -58,6 +58,8 @@ DTRACE_SRCDIR = $(GAMMADIR)/src/os/$(Platform_os_family)/dtrace
 DTRACE_COMMON_SRCDIR = $(GAMMADIR)/src/os/posix/dtrace
 DTRACE = dtrace
 DTRACE.o = $(DTRACE).o
+DTRACE_JHELPER = dtrace_jhelper
+DTRACE_JHELPER.o = $(DTRACE_JHELPER).o

 # to remove '-g' option which causes link problems
 # also '-z nodefs' is used as workaround
@ -255,7 +257,10 @@ ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 endif

 $(DTRACE).d: $(DTRACE_COMMON_SRCDIR)/hotspot.d $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d \
-             $(DTRACE_COMMON_SRCDIR)/hs_private.d $(DTRACE_SRCDIR)/jhelper.d
+             $(DTRACE_COMMON_SRCDIR)/hs_private.d
+	$(QUIETLY) cat $^ > $@
+
+$(DTRACE_JHELPER).d: $(DTRACE_SRCDIR)/jhelper.d
 	$(QUIETLY) cat $^ > $@

 DTraced_Files = ciEnv.o \
@ -280,7 +285,7 @@ DTraced_Files = ciEnv.o \
                vmGCOperations.o \

 # Dtrace is available, so we build $(DTRACE.o)  
-$(DTRACE.o): $(DTRACE).d $(JVMOFFS).h $(JVMOFFS)Index.h $(DTraced_Files)
+$(DTRACE.o): $(DTRACE).d $(DTraced_Files)
 	@echo Compiling $(DTRACE).d

 	$(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -G -xlazyload -o $@ -s $(DTRACE).d \
@ -344,6 +349,11 @@ $(DtraceOutDir)/hs_private.h: $(DTRACE_COMMON_SRCDIR)/hs_private.d | $(DtraceOut

 dtrace_gen_headers: $(DtraceOutDir)/hotspot.h $(DtraceOutDir)/hotspot_jni.h $(DtraceOutDir)/hs_private.h

+# The jhelper.d and hotspot probes are separated into two different SUNW_dof sections.
+# Now the jhelper.d is built without the -Xlazyload flag.
+$(DTRACE_JHELPER.o) : $(DTRACE_JHELPER).d $(JVMOFFS).h $(JVMOFFS)Index.h
+	@echo Compiling $(DTRACE_JHELPER).d
+	$(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -G -o $@ -s $(DTRACE_JHELPER).d

 .PHONY: dtraceCheck

@ -372,7 +382,7 @@ endif # ifneq ("$(patchDtraceFound)", "")
 ifneq ("${DTRACE_PROG}", "")
 ifeq ("${HOTSPOT_DISABLE_DTRACE_PROBES}", "")

-DTRACE_OBJS = $(DTRACE.o) $(JVMOFFS.o)
+DTRACE_OBJS = $(DTRACE.o) $(JVMOFFS.o) $(DTRACE_JHELPER.o)
 CFLAGS += $(DTRACE_INCL) -DDTRACE_ENABLED
 MAPFILE_DTRACE_OPT = $(MAPFILE_DTRACE)

--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
@ -298,6 +298,7 @@ class Assembler : public AbstractAssembler {
    LWZ_OPCODE   = (32u << OPCODE_SHIFT),
    LWZX_OPCODE  = (31u << OPCODE_SHIFT |  23u << 1),
    LWZU_OPCODE  = (33u << OPCODE_SHIFT),
+    LWBRX_OPCODE = (31u << OPCODE_SHIFT |  534 << 1),

    LHA_OPCODE   = (42u << OPCODE_SHIFT),
    LHAX_OPCODE  = (31u << OPCODE_SHIFT | 343u << 1),
@ -306,6 +307,7 @@ class Assembler : public AbstractAssembler {
    LHZ_OPCODE   = (40u << OPCODE_SHIFT),
    LHZX_OPCODE  = (31u << OPCODE_SHIFT | 279u << 1),
    LHZU_OPCODE  = (41u << OPCODE_SHIFT),
+    LHBRX_OPCODE = (31u << OPCODE_SHIFT |  790 << 1),

    LBZ_OPCODE   = (34u << OPCODE_SHIFT),
    LBZX_OPCODE  = (31u << OPCODE_SHIFT |  87u << 1),
@ -1364,11 +1366,17 @@ class Assembler : public AbstractAssembler {
  inline void lwax( Register d, Register s1, Register s2);
  inline void lwa(  Register d, int si16,    Register s1);

+  // 4 bytes reversed
+  inline void lwbrx( Register d, Register s1, Register s2);
+
  // 2 bytes
  inline void lhzx( Register d, Register s1, Register s2);
  inline void lhz(  Register d, int si16,    Register s1);
  inline void lhzu( Register d, int si16,    Register s1);

+  // 2 bytes reversed
+  inline void lhbrx( Register d, Register s1, Register s2);
+
  // 2 bytes
  inline void lhax( Register d, Register s1, Register s2);
  inline void lha(  Register d, int si16,    Register s1);
@ -1858,10 +1866,12 @@ class Assembler : public AbstractAssembler {
  inline void lwz(  Register d, int si16);
  inline void lwax( Register d, Register s2);
  inline void lwa(  Register d, int si16);
+  inline void lwbrx(Register d, Register s2);
  inline void lhzx( Register d, Register s2);
  inline void lhz(  Register d, int si16);
  inline void lhax( Register d, Register s2);
  inline void lha(  Register d, int si16);
+  inline void lhbrx(Register d, Register s2);
  inline void lbzx( Register d, Register s2);
  inline void lbz(  Register d, int si16);
  inline void ldx(  Register d, Register s2);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@ -263,10 +263,14 @@ inline void Assembler::lwzu( Register d, int si16,    Register s1) { assert(d !=
 inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lwa(  Register d, int si16,    Register s1) { emit_int32(LWA_OPCODE  | rt(d) | ds(si16)   | ra0mem(s1));}

+inline void Assembler::lwbrx( Register d, Register s1, Register s2) { emit_int32(LWBRX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+
 inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lhz(  Register d, int si16,    Register s1) { emit_int32(LHZ_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::lhzu( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}

+inline void Assembler::lhbrx( Register d, Register s1, Register s2) { emit_int32(LHBRX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+
 inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lha(  Register d, int si16,    Register s1) { emit_int32(LHA_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::lhau( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
@ -736,10 +740,12 @@ inline void Assembler::lwzx( Register d, Register s2) { emit_int32( LWZX_OPCODE
 inline void Assembler::lwz(  Register d, int si16   ) { emit_int32( LWZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lwa(  Register d, int si16   ) { emit_int32( LWA_OPCODE  | rt(d) | ds(si16));}
+inline void Assembler::lwbrx(Register d, Register s2) { emit_int32( LWBRX_OPCODE| rt(d) | rb(s2));}
 inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lhz(  Register d, int si16   ) { emit_int32( LHZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lha(  Register d, int si16   ) { emit_int32( LHA_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::lhbrx(Register d, Register s2) { emit_int32( LHBRX_OPCODE| rt(d) | rb(s2));}
 inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lbz(  Register d, int si16   ) { emit_int32( LBZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::ld(   Register d, int si16   ) { emit_int32( LD_OPCODE   | rt(d) | ds(si16));}
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
@ -119,9 +119,15 @@ void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg)
    // Call the Interpreter::remove_activation_preserving_args_entry()
    // func to get the address of the same-named entrypoint in the
    // generated interpreter code.
+#if defined(ABI_ELFv2)
+    call_c(CAST_FROM_FN_PTR(address,
+                            Interpreter::remove_activation_preserving_args_entry),
+           relocInfo::none);
+#else
    call_c(CAST_FROM_FN_PTR(FunctionDescriptor*,
                            Interpreter::remove_activation_preserving_args_entry),
           relocInfo::none);
+#endif

    // Jump to Interpreter::_remove_activation_preserving_args_entry.
    mtctr(R3_RET);
@ -331,29 +337,40 @@ void InterpreterMacroAssembler::empty_expression_stack() {
 void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(int         bcp_offset,
                                                          Register    Rdst,
                                                          signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (bcp_offset) {
+    load_const_optimized(Rdst, bcp_offset);
+    lhbrx(Rdst, R14_bcp, Rdst);
+  } else {
+    lhbrx(Rdst, R14_bcp);
+  }
+  if (is_signed == Signed) {
+    extsh(Rdst, Rdst);
+  }
+#else
  // Read Java big endian format.
  if (is_signed == Signed) {
    lha(Rdst, bcp_offset, R14_bcp);
  } else {
    lhz(Rdst, bcp_offset, R14_bcp);
  }
-#if 0
-  assert(Rtmp != Rdst, "need separate temp register");
-  Register Rfirst = Rtmp;
-  lbz(Rfirst, bcp_offset, R14_bcp); // first byte
-  lbz(Rdst, bcp_offset+1, R14_bcp); // second byte
-
-  // Rdst = ((Rfirst<<8) & 0xFF00) | (Rdst &~ 0xFF00)
-  rldimi(/*RA=*/Rdst, /*RS=*/Rfirst, /*sh=*/8, /*mb=*/48);
-  if (is_signed == Signed) {
-    extsh(Rdst, Rdst);
-  }
 #endif
 }

 void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(int         bcp_offset,
                                                          Register    Rdst,
                                                          signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (bcp_offset) {
+    load_const_optimized(Rdst, bcp_offset);
+    lwbrx(Rdst, R14_bcp, Rdst);
+  } else {
+    lwbrx(Rdst, R14_bcp);
+  }
+  if (is_signed == Signed) {
+    extsw(Rdst, Rdst);
+  }
+#else
  // Read Java big endian format.
  if (bcp_offset & 3) { // Offset unaligned?
    load_const_optimized(Rdst, bcp_offset);
@ -369,18 +386,26 @@ void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(int         bcp_offset
      lwz(Rdst, bcp_offset, R14_bcp);
    }
  }
+#endif
 }

+
 // Load the constant pool cache index from the bytecode stream.
 //
 // Kills / writes:
 //   - Rdst, Rscratch
 void InterpreterMacroAssembler::get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size) {
  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  // Cache index is always in the native format, courtesy of Rewriter.
  if (index_size == sizeof(u2)) {
-    get_2_byte_integer_at_bcp(bcp_offset, Rdst, Unsigned);
+    lhz(Rdst, bcp_offset, R14_bcp);
  } else if (index_size == sizeof(u4)) {
-    get_4_byte_integer_at_bcp(bcp_offset, Rdst, Signed);
+    if (bcp_offset & 3) {
+      load_const_optimized(Rdst, bcp_offset);
+      lwax(Rdst, R14_bcp, Rdst);
+    } else {
+      lwa(Rdst, bcp_offset, R14_bcp);
+    }
    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
    nand(Rdst, Rdst, Rdst); // convert to plain index
  } else if (index_size == sizeof(u1)) {
@ -397,6 +422,29 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, int b
  add(cache, R27_constPoolCache, cache);
 }

+// Load 4-byte signed or unsigned integer in Java format (that is, big-endian format)
+// from (Rsrc)+offset.
+void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset,
+                                       signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (offset) {
+    load_const_optimized(Rdst, offset);
+    lwbrx(Rdst, Rdst, Rsrc);
+  } else {
+    lwbrx(Rdst, Rsrc);
+  }
+  if (is_signed == Signed) {
+    extsw(Rdst, Rdst);
+  }
+#else
+  if (is_signed == Signed) {
+    lwa(Rdst, offset, Rsrc);
+  } else {
+    lwz(Rdst, offset, Rsrc);
+  }
+#endif
+}
+
 // Load object from cpool->resolved_references(index).
 void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
  assert_different_registers(result, index);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@ -130,6 +130,7 @@ class InterpreterMacroAssembler: public MacroAssembler {

  void get_cache_and_index_at_bcp(Register cache, int bcp_offset, size_t index_size = sizeof(u2));

+  void get_u4(Register Rdst, Register Rsrc, int offset, signedOrNot is_signed);

  // common code

--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
@ -1283,8 +1283,6 @@ int Compile::ConstantTable::calculate_table_base_offset() const {

 bool MachConstantBaseNode::requires_postalloc_expand() const { return true; }
 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
-  Compile *C = ra_->C;
-
  iRegPdstOper *op_dst = new iRegPdstOper();
  MachNode *m1 = new loadToc_hiNode();
  MachNode *m2 = new loadToc_loNode();
@ -2229,7 +2227,7 @@ const bool Matcher::isSimpleConstant64(jlong value) {
 }
 /* TODO: PPC port
 // Make a new machine dependent decode node (with its operands).
-MachTypeNode *Matcher::make_decode_node(Compile *C) {
+MachTypeNode *Matcher::make_decode_node() {
  assert(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0,
         "This method is only implemented for unscaled cOops mode so far");
  MachTypeNode *decode = new decodeN_unscaledNode();
@ -2593,7 +2591,7 @@ typedef struct {
  MachNode        *_last;
 } loadConLNodesTuple;

-loadConLNodesTuple loadConLNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
+loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
                                             OptoReg::Name reg_second, OptoReg::Name reg_first) {
  loadConLNodesTuple nodes;

@ -2669,7 +2667,7 @@ encode %{
  enc_class postalloc_expand_load_long_constant(iRegLdst dst, immL src, iRegLdst toc) %{
    // Create new nodes.
    loadConLNodesTuple loadConLNodes =
-      loadConLNodesTuple_create(C, ra_, n_toc, op_src,
+      loadConLNodesTuple_create(ra_, n_toc, op_src,
                                ra_->get_reg_second(this), ra_->get_reg_first(this));

    // Push new nodes.
@ -3391,7 +3389,7 @@ encode %{
    immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));

    loadConLNodesTuple loadConLNodes =
-      loadConLNodesTuple_create(C, ra_, n_toc, op_repl,
+      loadConLNodesTuple_create(ra_, n_toc, op_repl,
                                ra_->get_reg_second(this), ra_->get_reg_first(this));

    // Push new nodes.
@ -3611,7 +3609,7 @@ encode %{

    // Create the nodes for loading the IC from the TOC.
    loadConLNodesTuple loadConLNodes_IC =
-      loadConLNodesTuple_create(C, ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
+      loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong)Universe::non_oop_word()),
                                OptoReg::Name(R19_H_num), OptoReg::Name(R19_num));

    // Create the call node.
@ -3765,7 +3763,7 @@ encode %{
 #if defined(ABI_ELFv2)
    jlong entry_address = (jlong) this->entry_point();
    assert(entry_address, "need address here");
-    loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new immLOper(entry_address),
+    loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
                                                    OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));
 #else
    // Get the struct that describes the function we are about to call.
@ -3777,13 +3775,13 @@ encode %{
    loadConLNodesTuple loadConLNodes_Toc;

    // Create nodes and operands for loading the entry point.
-    loadConLNodes_Entry = loadConLNodesTuple_create(C, ra_, n_toc, new immLOper(entry_address),
+    loadConLNodes_Entry = loadConLNodesTuple_create(ra_, n_toc, new immLOper(entry_address),
                                                    OptoReg::Name(R12_H_num), OptoReg::Name(R12_num));


    // Create nodes and operands for loading the env pointer.
    if (fd->env() != NULL) {
-      loadConLNodes_Env = loadConLNodesTuple_create(C, ra_, n_toc, new immLOper((jlong) fd->env()),
+      loadConLNodes_Env = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->env()),
                                                    OptoReg::Name(R11_H_num), OptoReg::Name(R11_num));
    } else {
      loadConLNodes_Env._large_hi = NULL;
@ -3796,7 +3794,7 @@ encode %{
    }

    // Create nodes and operands for loading the Toc point.
-    loadConLNodes_Toc = loadConLNodesTuple_create(C, ra_, n_toc, new immLOper((jlong) fd->toc()),
+    loadConLNodes_Toc = loadConLNodesTuple_create(ra_, n_toc, new immLOper((jlong) fd->toc()),
                                                  OptoReg::Name(R2_H_num), OptoReg::Name(R2_num));
 #endif // ABI_ELFv2
    // mtctr node
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
@ -176,8 +176,12 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
  const Register size  = R12_scratch2;
  __ get_cache_and_index_at_bcp(cache, 1, index_size);

-  // Big Endian (get least significant byte of 64 bit value):
+  // Get least significant byte of 64 bit value:
+#if defined(VM_LITTLE_ENDIAN)
+  __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()), cache);
+#else
  __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache);
+#endif
  __ sldi(size, size, Interpreter::logStackElementSize);
  __ add(R15_esp, R15_esp, size);
  __ dispatch_next(state, step);
@ -858,7 +862,9 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  // Our signature handlers copy required arguments to the C stack
  // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13.
  __ mr(R3_ARG1, R18_locals);
+#if !defined(ABI_ELFv2)
  __ ld(signature_handler_fd, 0, signature_handler_fd);
+#endif

  __ call_stub(signature_handler_fd);

@ -1020,8 +1026,13 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  // native result across the call. No oop is present.

  __ mr(R3_ARG1, R16_thread);
+#if defined(ABI_ELFv2)
+  __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+            relocInfo::none);
+#else
  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
            relocInfo::none);
+#endif

  __ bind(sync_check_done);

--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
@ -189,8 +189,12 @@ void TemplateTable::patch_bytecode(Bytecodes::Code new_bc, Register Rnew_bc, Reg
      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
      __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1);
-      // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF
+      // ((*(cache+indices))>>((1+byte_no)*8))&0xFF:
+#if defined(VM_LITTLE_ENDIAN)
+      __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp);
+#else
      __ lbz(Rnew_bc, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp);
+#endif
      __ cmpwi(CCR0, Rnew_bc, 0);
      __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
      __ beq(CCR0, L_patch_done);
@ -1839,8 +1843,8 @@ void TemplateTable::tableswitch() {
  __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));

  // Load lo & hi.
-  __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr);
-  __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr);
+  __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
+  __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned);

  // Check for default case (=index outside [low,high]).
  __ cmpw(CCR0, R17_tos, Rlow_byte);
@ -1854,12 +1858,17 @@ void TemplateTable::tableswitch() {
  __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
  __ sldi(Rindex, Rindex, LogBytesPerInt);
  __ addi(Rindex, Rindex, 3 * BytesPerInt);
+#if defined(VM_LITTLE_ENDIAN)
+  __ lwbrx(Roffset, Rdef_offset_addr, Rindex);
+  __ extsw(Roffset, Roffset);
+#else
  __ lwax(Roffset, Rdef_offset_addr, Rindex);
+#endif
  __ b(Ldispatch);

  __ bind(Ldefault_case);
  __ profile_switch_default(Rhigh_byte, Rscratch1);
-  __ lwa(Roffset, 0, Rdef_offset_addr);
+  __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);

  __ bind(Ldispatch);

@ -1875,12 +1884,11 @@ void TemplateTable::lookupswitch() {
 // Table switch using linear search through cases.
 // Bytecode stream format:
 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
-// Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
+// Note: Everything is big-endian format here.
 void TemplateTable::fast_linearswitch() {
  transition(itos, vtos);

-  Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case;
-
+  Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case;
  Register Rcount           = R3_ARG1,
           Rcurrent_pair    = R4_ARG2,
           Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
@ -1894,47 +1902,40 @@ void TemplateTable::fast_linearswitch() {
  __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));

  // Setup loop counter and limit.
-  __ lwz(Rcount, BytesPerInt, Rdef_offset_addr);    // Load count.
+  __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
  __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.

-  // Set up search loop.
-  __ cmpwi(CCR0, Rcount, 0);
-  __ beq(CCR0, Ldefault_case);
-
  __ mtctr(Rcount);
+  __ cmpwi(CCR0, Rcount, 0);
+  __ bne(CCR0, Lloop_entry);

-  // linear table search
-  __ bind(Lsearch_loop);
-
-  __ lwz(Rvalue, 0, Rcurrent_pair);
-  __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair);
-
-  __ cmpw(CCR0, Rvalue, Rcmp_value);
-  __ beq(CCR0, Lfound);
-
-  __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
-  __ bdnz(Lsearch_loop);
-
-  // default case
+  // Default case
  __ bind(Ldefault_case);
-
-  __ lwa(Roffset, 0, Rdef_offset_addr);
+  __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
  if (ProfileInterpreter) {
    __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
-    __ b(Lcontinue_execution);
+  }
+  __ b(Lcontinue_execution);
+
+  // Next iteration
+  __ bind(Lsearch_loop);
+  __ bdz(Ldefault_case);
+  __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
+  __ bind(Lloop_entry);
+  __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned);
+  __ cmpw(CCR0, Rvalue, Rcmp_value);
+  __ bne(CCR0, Lsearch_loop);
+
+  // Found, load offset.
+  __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed);
+  // Calculate case index and profile
+  __ mfctr(Rcurrent_pair);
+  if (ProfileInterpreter) {
+    __ sub(Rcurrent_pair, Rcount, Rcurrent_pair);
+    __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
  }

-  // Entry found, skip Roffset bytecodes and continue.
-  __ bind(Lfound);
-  if (ProfileInterpreter) {
-    // Calc the num of the pair we hit. Careful, Rcurrent_pair points 2 ints
-    // beyond the actual current pair due to the auto update load above!
-    __ sub(Rcurrent_pair, Rcurrent_pair, Rdef_offset_addr);
-    __ addi(Rcurrent_pair, Rcurrent_pair, - 2 * BytesPerInt);
-    __ srdi(Rcurrent_pair, Rcurrent_pair, LogBytesPerInt + 1);
-    __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
-    __ bind(Lcontinue_execution);
-  }
+  __ bind(Lcontinue_execution);
  __ add(R14_bcp, Roffset, R14_bcp);
  __ dispatch_next(vtos);
 }
@ -1990,7 +1991,7 @@ void TemplateTable::fast_binaryswitch() {

  // initialize i & j
  __ li(Ri,0);
-  __ lwz(Rj, -BytesPerInt, Rarray);
+  __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned);

  // and start.
  Label entry;
@ -2007,7 +2008,11 @@ void TemplateTable::fast_binaryswitch() {
    //   i = h;
    // }
    __ sldi(Rscratch, Rh, log_entry_size);
+#if defined(VM_LITTLE_ENDIAN)
+    __ lwbrx(Rscratch, Rscratch, Rarray);
+#else
    __ lwzx(Rscratch, Rscratch, Rarray);
+#endif

    // if (key < current value)
    //   Rh = Rj
@ -2039,20 +2044,20 @@ void TemplateTable::fast_binaryswitch() {
  // Ri = value offset
  __ sldi(Ri, Ri, log_entry_size);
  __ add(Ri, Ri, Rarray);
-  __ lwz(Rscratch, 0, Ri);
+  __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);

  Label not_found;
  // Ri = offset offset
  __ cmpw(CCR0, Rkey, Rscratch);
  __ beq(CCR0, not_found);
  // entry not found -> j = default offset
-  __ lwz(Rj, -2 * BytesPerInt, Rarray);
+  __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned);
  __ b(default_case);

  __ bind(not_found);
  // entry found -> j = offset
  __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
-  __ lwz(Rj, BytesPerInt, Ri);
+  __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned);

  if (ProfileInterpreter) {
    __ b(continue_execution);
@ -2147,8 +2152,11 @@ void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Regist

  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
  // We are resolved if the indices offset contains the current bytecode.
-  // Big Endian:
+#if defined(VM_LITTLE_ENDIAN)
+  __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + byte_no + 1, Rcache);
+#else
  __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
+#endif
  // Acquire by cmp-br-isync (see below).
  __ cmpdi(CCR0, Rscratch, (int)bytecode());
  __ beq(CCR0, Lresolved);
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -6184,7 +6184,11 @@ instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $con,$dst\t! non-oop ptr" %}
  ins_encode %{
-    __ set($con$$constant, $dst$$Register);
+    if (_opnds[1]->constant_reloc() == relocInfo::metadata_type) {
+      __ set_metadata_constant((Metadata*)$con$$constant, $dst$$Register);
+    } else {
+      __ set($con$$constant, $dst$$Register);
+    }
  %}
  ins_pipe(loadConP);
 %}
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -3853,6 +3853,15 @@ void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
  emit_int8((unsigned char)(0xC0 | encode));
 }

+// Carry-Less Multiplication Quadword
+void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
+  assert(VM_Version::supports_clmul(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
  assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -1837,6 +1837,7 @@ private:
  void vpbroadcastd(XMMRegister dst, XMMRegister src);

  // Carry-Less Multiplication Quadword
+  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
  void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);

  // AVX instruction which is used to clear upper 128 bits of YMM registers and
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -7316,17 +7316,34 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
 * Fold 128-bit data chunk
 */
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
-  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
-  vpclmulldq(xcrc, xK, xcrc); // [63:0]
-  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+    vpclmulldq(xcrc, xK, xcrc); // [63:0]
+    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);   // [123:64]
+    pclmulldq(xcrc, xK);   // [63:0]
+    pxor(xcrc, xtmp);
+    movdqu(xtmp, Address(buf, offset));
+    pxor(xcrc, xtmp);
+  }
 }

 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
-  vpclmulhdq(xtmp, xK, xcrc);
-  vpclmulldq(xcrc, xK, xcrc);
-  pxor(xcrc, xbuf);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc);
+    vpclmulldq(xcrc, xK, xcrc);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);
+    pclmulldq(xcrc, xK);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  }
 }

 /**
@ -7444,9 +7461,17 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
  // Fold 128 bits in xmm1 down into 32 bits in crc register.
  BIND(L_fold_128b);
  movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
-  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
-  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  if (UseAVX > 0) {
+    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  } else {
+    movdqa(xmm2, xmm0);
+    pclmulqdq(xmm2, xmm1, 0x1);
+    movdqa(xmm3, xmm0);
+    pand(xmm3, xmm2);
+    pclmulqdq(xmm0, xmm3, 0x1);
+  }
  psrldq(xmm1, 8);
  psrldq(xmm2, 4);
  pxor(xmm0, xmm1);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
@ -966,6 +966,16 @@ public:
  void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
  void mulss(XMMRegister dst, AddressLiteral src);

+  // Carry-Less Multiplication Quadword
+  void pclmulldq(XMMRegister dst, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::pclmulqdq(dst, src, 0x00);
+  }
+  void pclmulhdq(XMMRegister dst, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::pclmulqdq(dst, src, 0x11);
+  }
+
  void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
  void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
  void sqrtsd(XMMRegister dst, AddressLiteral src);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@ -559,7 +559,7 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UseCLMUL, false);
  }

-  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+  if (UseCLMUL && (UseSSE > 2)) {
    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
      UseCRC32Intrinsics = true;
    }
@ -805,6 +805,21 @@ void VM_Version::get_processor_features() {
        }
      }
    }
+    if ((cpu_family() == 0x06) &&
+        ((extended_cpu_model() == 0x36) || // Centerton
+         (extended_cpu_model() == 0x37) || // Silvermont
+         (extended_cpu_model() == 0x4D))) {
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(OptoScheduling)) {
+        OptoScheduling = true;
+      }
+#endif
+      if (supports_sse4_2()) { // Silvermont
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
  }

  // Use count leading zeros count instruction if available.
@ -892,23 +907,25 @@ void VM_Version::get_processor_features() {
  AllocatePrefetchDistance = allocate_prefetch_distance();
  AllocatePrefetchStyle    = allocate_prefetch_style();

-  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
-    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
+  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
 #ifdef _LP64
      AllocatePrefetchDistance = 384;
 #else
      AllocatePrefetchDistance = 320;
 #endif
    }
-    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
      AllocatePrefetchDistance = 192;
      AllocatePrefetchLines = 4;
+    }
 #ifdef COMPILER2
-      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+    if (supports_sse4_2()) {
+      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
      }
-#endif
    }
+#endif
  }
  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");

--- a/hotspot/src/os/solaris/vm/attachListener_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/attachListener_solaris.cpp
@ -199,23 +199,29 @@ class ArgumentIterator : public StackObj {
 // Calls from the door function to check that the client credentials
 // match this process. Returns 0 if credentials okay, otherwise -1.
 static int check_credentials() {
-  door_cred_t cred_info;
+  ucred_t *cred_info = NULL;
+  int ret = -1; // deny by default

  // get client credentials
-  if (door_cred(&cred_info) == -1) {
-    return -1; // unable to get them
+  if (door_ucred(&cred_info) == -1) {
+    return -1; // unable to get them, deny
  }

  // get our euid/eguid (probably could cache these)
  uid_t euid = geteuid();
  gid_t egid = getegid();

-  // check that the effective uid/gid matches - discuss this with Jeff.
-  if (cred_info.dc_euid == euid && cred_info.dc_egid == egid) {
-    return 0;  // okay
-  } else {
-    return -1; // denied
+  // get euid/egid from ucred_free
+  uid_t ucred_euid = ucred_geteuid(cred_info);
+  gid_t ucred_egid = ucred_getegid(cred_info);
+
+  // check that the effective uid/gid matches
+  if (ucred_euid == euid && ucred_egid == egid) {
+    ret =  0;  // allow
  }
+
+  ucred_free(cred_info);
+  return ret;
 }


--- a/hotspot/src/share/vm/adlc/output_c.cpp
+++ b/hotspot/src/share/vm/adlc/output_c.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -997,7 +997,7 @@ void ArchDesc::build_pipe_classes(FILE *fp_cpp) {
  int nopcnt = 0;
  for ( _pipeline->_noplist.reset(); (nop = _pipeline->_noplist.iter()) != NULL; nopcnt++ );

-  fprintf(fp_cpp, "void Bundle::initialize_nops(MachNode * nop_list[%d], Compile *C) {\n", nopcnt);
+  fprintf(fp_cpp, "void Bundle::initialize_nops(MachNode * nop_list[%d]) {\n", nopcnt);
  int i = 0;
  for ( _pipeline->_noplist.reset(); (nop = _pipeline->_noplist.iter()) != NULL; i++ ) {
    fprintf(fp_cpp, "  nop_list[%d] = (MachNode *) new %sNode();\n", i, nop);
@ -1369,7 +1369,7 @@ static void generate_peepreplace( FILE *fp, FormDict &globals, PeepMatch *pmatch
        fprintf(fp, "        ra_->add_reference(root, inst%d);\n", inst_num);
        fprintf(fp, "        ra_->set_oop (root, ra_->is_oop(inst%d));\n", inst_num);
        fprintf(fp, "        ra_->set_pair(root->_idx, ra_->get_reg_second(inst%d), ra_->get_reg_first(inst%d));\n", inst_num, inst_num);
-        fprintf(fp, "        root->_opnds[0] = inst%d->_opnds[0]->clone(C); // result\n", inst_num);
+        fprintf(fp, "        root->_opnds[0] = inst%d->_opnds[0]->clone(); // result\n", inst_num);
        fprintf(fp, "        // ----- Done with initial setup -----\n");
      } else {
        if( (op_form == NULL) || (op_form->is_base_constant(globals) == Form::none) ) {
@ -1382,7 +1382,7 @@ static void generate_peepreplace( FILE *fp, FormDict &globals, PeepMatch *pmatch
        } else {
          fprintf(fp, "        // no ideal edge for constants after matching\n");
        }
-        fprintf(fp, "        root->_opnds[%d] = inst%d->_opnds[%d]->clone(C);\n",
+        fprintf(fp, "        root->_opnds[%d] = inst%d->_opnds[%d]->clone();\n",
                opnds_index, inst_num, inst_op_num );
      }
      ++opnds_index;
@ -1402,7 +1402,7 @@ static void generate_peepreplace( FILE *fp, FormDict &globals, PeepMatch *pmatch
 // Define the Peephole method for an instruction node
 void ArchDesc::definePeephole(FILE *fp, InstructForm *node) {
  // Generate Peephole function header
-  fprintf(fp, "MachNode *%sNode::peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C ) {\n", node->_ident);
+  fprintf(fp, "MachNode *%sNode::peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted) {\n", node->_ident);
  fprintf(fp, "  bool  matches = true;\n");

  // Identify the maximum instruction position,
@ -1593,7 +1593,7 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
      }

      const char *resultOper = new_inst->reduce_result();
-      fprintf(fp,"  n%d->set_opnd_array(0, state->MachOperGenerator( %s, C ));\n",
+      fprintf(fp,"  n%d->set_opnd_array(0, state->MachOperGenerator(%s));\n",
              cnt, machOperEnum(resultOper));

      // get the formal operand NameList
@ -1634,7 +1634,7 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
          // If there is no use of the created operand, just skip it
          if (new_pos != NameList::Not_in_list) {
            //Copy the operand from the original made above
-            fprintf(fp,"  n%d->set_opnd_array(%d, op%d->clone(C)); // %s\n",
+            fprintf(fp,"  n%d->set_opnd_array(%d, op%d->clone()); // %s\n",
                    cnt, new_pos, exp_pos-node->num_opnds(), opid);
            // Check for who defines this operand & add edge if needed
            fprintf(fp,"  if(tmp%d != NULL)\n", exp_pos);
@ -1662,7 +1662,7 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
          new_pos = new_inst->operand_position(parameter,Component::USE);
          if (new_pos != -1) {
            // Copy the operand from the ExpandNode to the new node
-            fprintf(fp,"  n%d->set_opnd_array(%d, opnd_array(%d)->clone(C)); // %s\n",
+            fprintf(fp,"  n%d->set_opnd_array(%d, opnd_array(%d)->clone()); // %s\n",
                    cnt, new_pos, exp_pos, opid);
            // For each operand add appropriate input edges by looking at tmp's
            fprintf(fp,"  if(tmp%d == this) {\n", exp_pos);
@ -1729,14 +1729,14 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
          declared_def = true;
        }
        if (op && op->_interface && op->_interface->is_RegInterface()) {
-          fprintf(fp,"  def = new MachTempNode(state->MachOperGenerator( %s, C ));\n",
+          fprintf(fp,"  def = new MachTempNode(state->MachOperGenerator(%s));\n",
                  machOperEnum(op->_ident));
          fprintf(fp,"  add_req(def);\n");
          // The operand for TEMP is already constructed during
          // this mach node construction, see buildMachNode().
          //
          // int idx  = node->operand_position_format(comp->_name);
-          // fprintf(fp,"  set_opnd_array(%d, state->MachOperGenerator( %s, C ));\n",
+          // fprintf(fp,"  set_opnd_array(%d, state->MachOperGenerator(%s));\n",
          //         idx, machOperEnum(op->_ident));
        } else {
          assert(false, "can't have temps which aren't registers");
@ -1802,7 +1802,7 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) {
        uint j = node->unique_opnds_idx(i);
        // unique_opnds_idx(i) is unique if unique_opnds_idx(j) is not unique.
        if( j != node->unique_opnds_idx(j) ) {
-          fprintf(fp,"  set_opnd_array(%d, opnd_array(%d)->clone(C)); // %s\n",
+          fprintf(fp,"  set_opnd_array(%d, opnd_array(%d)->clone()); // %s\n",
                  new_num_opnds, i, comp->_name);
          // delete not unique edges here
          fprintf(fp,"  for(unsigned i = 0; i < num%d; i++) {\n", i);
@ -2839,12 +2839,12 @@ static void defineIn_RegMask(FILE *fp, FormDict &globals, OperandForm &oper) {

 // generate code to create a clone for a class derived from MachOper
 //
-// (0)  MachOper  *MachOperXOper::clone(Compile* C) const {
+// (0)  MachOper  *MachOperXOper::clone() const {
 // (1)    return new MachXOper( _ccode, _c0, _c1, ..., _cn);
 // (2)  }
 //
 static void defineClone(FILE *fp, FormDict &globalNames, OperandForm &oper) {
-  fprintf(fp,"MachOper *%sOper::clone(Compile* C) const {\n", oper._ident);
+  fprintf(fp,"MachOper *%sOper::clone() const {\n", oper._ident);
  // Check for constants that need to be copied over
  const int  num_consts    = oper.num_consts(globalNames);
  const bool is_ideal_bool = oper.is_ideal_bool();
@ -3043,7 +3043,7 @@ void ArchDesc::define_oper_interface(FILE *fp, OperandForm &oper, FormDict &glob
 static void define_fill_new_machnode(bool used, FILE *fp_cpp) {
  fprintf(fp_cpp, "\n");
  fprintf(fp_cpp, "// Copy _idx, inputs and operands to new node\n");
-  fprintf(fp_cpp, "void MachNode::fill_new_machnode( MachNode* node, Compile* C) const {\n");
+  fprintf(fp_cpp, "void MachNode::fill_new_machnode(MachNode* node) const {\n");
  if( !used ) {
    fprintf(fp_cpp, "  // This architecture does not have cisc or short branch instructions\n");
    fprintf(fp_cpp, "  ShouldNotCallThis();\n");
@ -3064,7 +3064,7 @@ static void define_fill_new_machnode(bool used, FILE *fp_cpp) {
    fprintf(fp_cpp, "  MachOper **to = node->_opnds;\n");
    fprintf(fp_cpp, "  for( int i = 0; i < nopnds; i++ ) {\n");
    fprintf(fp_cpp, "    if( i != cisc_operand() ) \n");
-    fprintf(fp_cpp, "      to[i] = _opnds[i]->clone(C);\n");
+    fprintf(fp_cpp, "      to[i] = _opnds[i]->clone();\n");
    fprintf(fp_cpp, "  }\n");
    fprintf(fp_cpp, "}\n");
  }
@ -3105,7 +3105,7 @@ void ArchDesc::defineClasses(FILE *fp) {
    if ( strcmp(oper->_ident,"label") == 0 ) {
      defineIn_RegMask(_CPP_MISC_file._fp, _globalNames, *oper);

-      fprintf(fp,"MachOper  *%sOper::clone(Compile* C) const {\n", oper->_ident);
+      fprintf(fp,"MachOper  *%sOper::clone() const {\n", oper->_ident);
      fprintf(fp,"  return  new %sOper(_label, _block_num);\n", oper->_ident);
      fprintf(fp,"}\n");

@ -3124,7 +3124,7 @@ void ArchDesc::defineClasses(FILE *fp) {
    if ( strcmp(oper->_ident,"method") == 0 ) {
      defineIn_RegMask(_CPP_MISC_file._fp, _globalNames, *oper);

-      fprintf(fp,"MachOper  *%sOper::clone(Compile* C) const {\n", oper->_ident);
+      fprintf(fp,"MachOper  *%sOper::clone() const {\n", oper->_ident);
      fprintf(fp,"  return  new %sOper(_method);\n", oper->_ident);
      fprintf(fp,"}\n");

@ -3845,7 +3845,7 @@ void ArchDesc::buildMachOperGenerator(FILE *fp_cpp) {
          "// that invokes 'new' on the corresponding class constructor.\n");
  fprintf(fp_cpp, "\n");
  fprintf(fp_cpp, "MachOper *State::MachOperGenerator");
-  fprintf(fp_cpp, "(int opcode, Compile* C)");
+  fprintf(fp_cpp, "(int opcode)");
  fprintf(fp_cpp, "{\n");
  fprintf(fp_cpp, "\n");
  fprintf(fp_cpp, "  switch(opcode) {\n");
@ -3921,7 +3921,7 @@ void ArchDesc::buildMachNode(FILE *fp_cpp, InstructForm *inst, const char *inden
      int         index  = clist.operand_position(comp->_name, comp->_usedef, inst);
      const char *opcode = machOperEnum(comp->_type);
      fprintf(fp_cpp, "%s node->set_opnd_array(%d, ", indent, index);
-      fprintf(fp_cpp, "MachOperGenerator(%s, C));\n", opcode);
+      fprintf(fp_cpp, "MachOperGenerator(%s));\n", opcode);
      }
  }
  else if ( inst->is_chain_of_constant(_globalNames, opType) ) {
@ -3978,7 +3978,7 @@ void InstructForm::declare_cisc_version(ArchDesc &AD, FILE *fp_hpp) {
    InstructForm *inst_cisc = cisc_spill_alternate();
    if (inst_cisc != NULL) {
      fprintf(fp_hpp, "  virtual int            cisc_operand() const { return %d; }\n", cisc_spill_operand());
-      fprintf(fp_hpp, "  virtual MachNode      *cisc_version(int offset, Compile* C);\n");
+      fprintf(fp_hpp, "  virtual MachNode      *cisc_version(int offset);\n");
      fprintf(fp_hpp, "  virtual void           use_cisc_RegMask();\n");
      fprintf(fp_hpp, "  virtual const RegMask *cisc_RegMask() const { return _cisc_RegMask; }\n");
    }
@ -4008,7 +4008,7 @@ bool InstructForm::define_cisc_version(ArchDesc &AD, FILE *fp_cpp) {
    // Construct CISC version of this instruction
    fprintf(fp_cpp, "\n");
    fprintf(fp_cpp, "// Build CISC version of this instruction\n");
-    fprintf(fp_cpp, "MachNode *%sNode::cisc_version( int offset, Compile* C ) {\n", this->_ident);
+    fprintf(fp_cpp, "MachNode *%sNode::cisc_version(int offset) {\n", this->_ident);
    // Create the MachNode object
    fprintf(fp_cpp, "  %sNode *node = new %sNode();\n", name, name);
    // Fill in the bottom_type where requested
@ -4023,7 +4023,7 @@ bool InstructForm::define_cisc_version(ArchDesc &AD, FILE *fp_cpp) {

    fprintf(fp_cpp, "\n");
    fprintf(fp_cpp, "  // Copy _idx, inputs and operands to new node\n");
-    fprintf(fp_cpp, "  fill_new_machnode(node, C);\n");
+    fprintf(fp_cpp, "  fill_new_machnode(node);\n");
    // Construct operand to access [stack_pointer + offset]
    fprintf(fp_cpp, "  // Construct operand to access [stack_pointer + offset]\n");
    fprintf(fp_cpp, "  node->set_opnd_array(cisc_operand(), new %sOper(offset));\n", cisc_oper_name);
@ -4042,7 +4042,7 @@ bool InstructForm::define_cisc_version(ArchDesc &AD, FILE *fp_cpp) {
 // Build prototypes for short branch methods
 void InstructForm::declare_short_branch_methods(FILE *fp_hpp) {
  if (has_short_branch_form()) {
-    fprintf(fp_hpp, "  virtual MachNode      *short_branch_version(Compile* C);\n");
+    fprintf(fp_hpp, "  virtual MachNode      *short_branch_version();\n");
  }
 }

@ -4055,7 +4055,7 @@ bool InstructForm::define_short_branch_methods(ArchDesc &AD, FILE *fp_cpp) {

    // Construct short_branch_version() method.
    fprintf(fp_cpp, "// Build short branch version of this instruction\n");
-    fprintf(fp_cpp, "MachNode *%sNode::short_branch_version(Compile* C) {\n", this->_ident);
+    fprintf(fp_cpp, "MachNode *%sNode::short_branch_version() {\n", this->_ident);
    // Create the MachNode object
    fprintf(fp_cpp, "  %sNode *node = new %sNode();\n", name, name);
    if( is_ideal_if() ) {
@ -4071,7 +4071,7 @@ bool InstructForm::define_short_branch_methods(ArchDesc &AD, FILE *fp_cpp) {
    // Short branch version must use same node index for access
    // through allocator's tables
    fprintf(fp_cpp, "  // Copy _idx, inputs and operands to new node\n");
-    fprintf(fp_cpp, "  fill_new_machnode(node, C);\n");
+    fprintf(fp_cpp, "  fill_new_machnode(node);\n");

    // Return result and exit scope
    fprintf(fp_cpp, "  return node;\n");
@ -4097,7 +4097,7 @@ void ArchDesc::buildMachNodeGenerator(FILE *fp_cpp) {
          "// that invokes 'new' on the corresponding class constructor.\n");
  fprintf(fp_cpp, "\n");
  fprintf(fp_cpp, "MachNode *State::MachNodeGenerator");
-  fprintf(fp_cpp, "(int opcode, Compile* C)");
+  fprintf(fp_cpp, "(int opcode)");
  fprintf(fp_cpp, "{\n");
  fprintf(fp_cpp, "  switch(opcode) {\n");

--- a/hotspot/src/share/vm/adlc/output_h.cpp
+++ b/hotspot/src/share/vm/adlc/output_h.cpp
@ -1119,7 +1119,7 @@ void ArchDesc::declare_pipe_classes(FILE *fp_hpp) {
  fprintf(fp_hpp, "    _nop_count = %d\n",
    _pipeline->_nopcnt);
  fprintf(fp_hpp, "  };\n\n");
-  fprintf(fp_hpp, "  static void initialize_nops(MachNode *nop_list[%d], Compile* C);\n\n",
+  fprintf(fp_hpp, "  static void initialize_nops(MachNode *nop_list[%d]);\n\n",
    _pipeline->_nopcnt);
  fprintf(fp_hpp, "#ifndef PRODUCT\n");
  fprintf(fp_hpp, "  void dump(outputStream *st = tty) const;\n");
@ -1240,7 +1240,7 @@ void ArchDesc::declareClasses(FILE *fp) {
                      constant_type, _globalNames);

    // Clone function
-    fprintf(fp,"  virtual MachOper      *clone(Compile* C) const;\n");
+    fprintf(fp,"  virtual MachOper      *clone() const;\n");

    // Support setting a spill offset into a constant operand.
    // We only support setting an 'int' offset, while in the
@ -1718,7 +1718,7 @@ void ArchDesc::declareClasses(FILE *fp) {

    // If there is an explicit peephole rule, build it
    if ( instr->peepholes() != NULL ) {
-      fprintf(fp,"  virtual MachNode      *peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile *C);\n");
+      fprintf(fp,"  virtual MachNode      *peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted);\n");
    }

    // Output the declaration for number of relocation entries
@ -1863,7 +1863,7 @@ void ArchDesc::declareClasses(FILE *fp) {
    }
    if ( instr->num_post_match_opnds() != 0
         || instr->is_chain_of_constant(_globalNames) ) {
-      fprintf(fp,"  friend MachNode *State::MachNodeGenerator(int opcode, Compile* C);\n");
+      fprintf(fp,"  friend MachNode *State::MachNodeGenerator(int opcode);\n");
    }
    if ( instr->rematerialize(_globalNames, get_registers()) ) {
      fprintf(fp,"  // Rematerialize %s\n", instr->_ident);
@ -2071,8 +2071,8 @@ void ArchDesc::defineStateClass(FILE *fp) {
  fprintf(fp,"  DEBUG_ONLY( ~State(void); )       // Destructor\n");
  fprintf(fp,"\n");
  fprintf(fp,"  // Methods created by ADLC and invoked by Reduce\n");
-  fprintf(fp,"  MachOper *MachOperGenerator( int opcode, Compile* C );\n");
-  fprintf(fp,"  MachNode *MachNodeGenerator( int opcode, Compile* C );\n");
+  fprintf(fp,"  MachOper *MachOperGenerator(int opcode);\n");
+  fprintf(fp,"  MachNode *MachNodeGenerator(int opcode);\n");
  fprintf(fp,"\n");
  fprintf(fp,"  // Assign a state to a node, definition of method produced by ADLC\n");
  fprintf(fp,"  bool DFA( int opcode, const Node *ideal );\n");
--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp
@ -1613,25 +1613,22 @@ void LinearScan::allocate_registers() {
  Interval* precolored_cpu_intervals, *not_precolored_cpu_intervals;
  Interval* precolored_fpu_intervals, *not_precolored_fpu_intervals;

-  create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals, is_precolored_cpu_interval, is_virtual_cpu_interval);
-  if (has_fpu_registers()) {
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
-#ifdef ASSERT
-  } else {
-    // fpu register allocation is omitted because no virtual fpu registers are present
-    // just check this again...
-    create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals, is_precolored_fpu_interval, is_virtual_fpu_interval);
-    assert(not_precolored_fpu_intervals == Interval::end(), "missed an uncolored fpu interval");
-#endif
-  }
-
  // allocate cpu registers
+  create_unhandled_lists(&precolored_cpu_intervals, &not_precolored_cpu_intervals,
+                         is_precolored_cpu_interval, is_virtual_cpu_interval);
+
+  // allocate fpu registers
+  create_unhandled_lists(&precolored_fpu_intervals, &not_precolored_fpu_intervals,
+                         is_precolored_fpu_interval, is_virtual_fpu_interval);
+
+  // the fpu interval allocation cannot be moved down below with the fpu section as
+  // the cpu_lsw.walk() changes interval positions.
+
  LinearScanWalker cpu_lsw(this, precolored_cpu_intervals, not_precolored_cpu_intervals);
  cpu_lsw.walk();
  cpu_lsw.finish_allocation();

  if (has_fpu_registers()) {
-    // allocate fpu registers
    LinearScanWalker fpu_lsw(this, precolored_fpu_intervals, not_precolored_fpu_intervals);
    fpu_lsw.walk();
    fpu_lsw.finish_allocation();
--- a/hotspot/src/share/vm/classfile/stackMapFrame.cpp
+++ b/hotspot/src/share/vm/classfile/stackMapFrame.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -54,21 +54,6 @@ StackMapFrame* StackMapFrame::frame_in_exception_handler(u1 flags) {
  return frame;
 }

-bool StackMapFrame::has_new_object() const {
-  int32_t i;
-  for (i = 0; i < _max_locals; i++) {
-    if (_locals[i].is_uninitialized()) {
-      return true;
-    }
-  }
-  for (i = 0; i < _stack_size; i++) {
-    if (_stack[i].is_uninitialized()) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void StackMapFrame::initialize_object(
    VerificationType old_object, VerificationType new_object) {
  int32_t i;
--- a/hotspot/src/share/vm/classfile/stackMapFrame.hpp
+++ b/hotspot/src/share/vm/classfile/stackMapFrame.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -154,10 +154,6 @@ class StackMapFrame : public ResourceObj {
  VerificationType set_locals_from_arg(
    const methodHandle m, VerificationType thisKlass, TRAPS);

-  // Search local variable type array and stack type array.
-  // Return true if an uninitialized object is found.
-  bool has_new_object() const;
-
  // Search local variable type array and stack type array.
  // Set every element with type of old_object to new_object.
  void initialize_object(
--- a/hotspot/src/share/vm/classfile/stackMapTable.cpp
+++ b/hotspot/src/share/vm/classfile/stackMapTable.cpp
@ -70,24 +70,26 @@ int StackMapTable::get_index_from_offset(int32_t offset) const {

 bool StackMapTable::match_stackmap(
    StackMapFrame* frame, int32_t target,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const {
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const {
  int index = get_index_from_offset(target);
-  return match_stackmap(frame, target, index, match, update, ctx, THREAD);
+  return match_stackmap(frame, target, index, match, update, handler, ctx, THREAD);
 }

 // Match and/or update current_frame to the frame in stackmap table with
 // specified offset and frame index. Return true if the two frames match.
+// handler is true if the frame in stackmap_table is for an exception handler.
 //
-// The values of match and update are:                  _match__update_
+// The values of match and update are:                  _match__update__handler
 //
-// checking a branch target/exception handler:           true   false
+// checking a branch target:                             true   false   false
+// checking an exception handler:                        true   false   true
 // linear bytecode verification following an
-// unconditional branch:                                 false  true
+// unconditional branch:                                 false  true    false
 // linear bytecode verification not following an
-// unconditional branch:                                 true   true
+// unconditional branch:                                 true   true    false
 bool StackMapTable::match_stackmap(
    StackMapFrame* frame, int32_t target, int32_t frame_index,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const {
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const {
  if (frame_index < 0 || frame_index >= _frame_count) {
    *ctx = ErrorContext::missing_stackmap(frame->offset());
    frame->verifier()->verify_error(
@ -98,11 +100,9 @@ bool StackMapTable::match_stackmap(
  StackMapFrame *stackmap_frame = _frame_array[frame_index];
  bool result = true;
  if (match) {
-    // when checking handler target, match == true && update == false
-    bool is_exception_handler = !update;
    // Has direct control flow from last instruction, need to match the two
    // frames.
-    result = frame->is_assignable_to(stackmap_frame, is_exception_handler,
+    result = frame->is_assignable_to(stackmap_frame, handler,
        ctx, CHECK_VERIFY_(frame->verifier(), result));
  }
  if (update) {
@ -126,24 +126,10 @@ void StackMapTable::check_jump_target(
    StackMapFrame* frame, int32_t target, TRAPS) const {
  ErrorContext ctx;
  bool match = match_stackmap(
-    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
+    frame, target, true, false, false, &ctx, CHECK_VERIFY(frame->verifier()));
  if (!match || (target < 0 || target >= _code_length)) {
    frame->verifier()->verify_error(ctx,
        "Inconsistent stackmap frames at branch target %d", target);
-    return;
-  }
-  // check if uninitialized objects exist on backward branches
-  check_new_object(frame, target, CHECK_VERIFY(frame->verifier()));
-  frame->verifier()->update_furthest_jump(target);
-}
-
-void StackMapTable::check_new_object(
-    const StackMapFrame* frame, int32_t target, TRAPS) const {
-  if (frame->offset() > target && frame->has_new_object()) {
-    frame->verifier()->verify_error(
-        ErrorContext::bad_code(frame->offset()),
-        "Uninitialized object exists on backward branch %d", target);
-    return;
  }
 }

--- a/hotspot/src/share/vm/classfile/stackMapTable.hpp
+++ b/hotspot/src/share/vm/classfile/stackMapTable.hpp
@ -60,12 +60,12 @@ class StackMapTable : public StackObj {
  // specified offset. Return true if the two frames match.
  bool match_stackmap(
    StackMapFrame* current_frame, int32_t offset,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const;
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const;
  // Match and/or update current_frame to the frame in stackmap table with
  // specified offset and frame index. Return true if the two frames match.
  bool match_stackmap(
    StackMapFrame* current_frame, int32_t offset, int32_t frame_index,
-    bool match, bool update, ErrorContext* ctx, TRAPS) const;
+    bool match, bool update, bool handler, ErrorContext* ctx, TRAPS) const;

  // Check jump instructions. Make sure there are no uninitialized
  // instances on backward branch.
@ -76,10 +76,6 @@ class StackMapTable : public StackObj {
  // Returns the frame array index where the frame with offset is stored.
  int get_index_from_offset(int32_t offset) const;

-  // Make sure that there's no uninitialized object exist on backward branch.
-  void check_new_object(
-    const StackMapFrame* frame, int32_t target, TRAPS) const;
-
  void print_on(outputStream* str) const;
 };

--- a/hotspot/src/share/vm/classfile/verifier.cpp
+++ b/hotspot/src/share/vm/classfile/verifier.cpp
@ -620,8 +620,6 @@ void ClassVerifier::verify_method(methodHandle m, TRAPS) {
                                // flow from current instruction to the next
                                // instruction in sequence

-  set_furthest_jump(0);
-
  Bytecodes::Code opcode;
  while (!bcs.is_last_bytecode()) {
    // Check for recursive re-verification before each bytecode.
@ -1780,7 +1778,7 @@ u2 ClassVerifier::verify_stackmap_table(u2 stackmap_index, u2 bci,
      // If matched, current_frame will be updated by this method.
      bool matches = stackmap_table->match_stackmap(
        current_frame, this_offset, stackmap_index,
-        !no_control_flow, true, &ctx, CHECK_VERIFY_(this, 0));
+        !no_control_flow, true, false, &ctx, CHECK_VERIFY_(this, 0));
      if (!matches) {
        // report type error
        verify_error(ctx, "Instruction type does not match stack map");
@ -1827,7 +1825,7 @@ void ClassVerifier::verify_exception_handler_targets(u2 bci, bool this_uninit, S
      }
      ErrorContext ctx;
      bool matches = stackmap_table->match_stackmap(
-        new_frame, handler_pc, true, false, &ctx, CHECK_VERIFY(this));
+        new_frame, handler_pc, true, false, true, &ctx, CHECK_VERIFY(this));
      if (!matches) {
        verify_error(ctx, "Stack map does not match the one at "
            "exception handler %d", handler_pc);
@ -2238,13 +2236,6 @@ void ClassVerifier::verify_invoke_init(
      return;
    }

-    // Make sure that this call is not jumped over.
-    if (bci < furthest_jump()) {
-      verify_error(ErrorContext::bad_code(bci),
-                   "Bad <init> method call from inside of a branch");
-      return;
-    }
-
    // Make sure that this call is not done from within a TRY block because
    // that can result in returning an incomplete object.  Simply checking
    // (bci >= start_pc) also ensures that this call is not done after a TRY
--- a/hotspot/src/share/vm/classfile/verifier.hpp
+++ b/hotspot/src/share/vm/classfile/verifier.hpp
@ -258,9 +258,6 @@ class ClassVerifier : public StackObj {

  ErrorContext _error_context;  // contains information about an error

-  // Used to detect illegal jumps over calls to super() nd this() in ctors.
-  int32_t _furthest_jump;
-
  void verify_method(methodHandle method, TRAPS);
  char* generate_code_data(methodHandle m, u4 code_length, TRAPS);
  void verify_exception_handler_table(u4 code_length, char* code_data,
@ -407,19 +404,6 @@ class ClassVerifier : public StackObj {

  TypeOrigin ref_ctx(const char* str, TRAPS);

-  // Keep track of the furthest branch done in a method to make sure that
-  // there are no branches over calls to super() or this() from inside of
-  // a constructor.
-  int32_t furthest_jump() { return _furthest_jump; }
-
-  void set_furthest_jump(int32_t target) {
-    _furthest_jump = target;
-  }
-
-  void update_furthest_jump(int32_t target) {
-    if (target > _furthest_jump) _furthest_jump = target;
-  }
-
 };

 inline int ClassVerifier::change_sig_to_verificationType(
--- a/hotspot/src/share/vm/code/dependencies.cpp
+++ b/hotspot/src/share/vm/code/dependencies.cpp
@ -407,56 +407,66 @@ void Dependencies::check_valid_dependency_type(DepType dept) {
 // for the sake of the compiler log, print out current dependencies:
 void Dependencies::log_all_dependencies() {
  if (log() == NULL)  return;
-  ciBaseObject* args[max_arg_count];
+  ResourceMark rm;
  for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
    DepType dept = (DepType)deptv;
    GrowableArray<ciBaseObject*>* deps = _deps[dept];
-    if (deps->length() == 0)  continue;
+    int deplen = deps->length();
+    if (deplen == 0) {
+      continue;
+    }
    int stride = dep_args(dept);
+    GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(stride);
    for (int i = 0; i < deps->length(); i += stride) {
      for (int j = 0; j < stride; j++) {
        // flush out the identities before printing
-        args[j] = deps->at(i+j);
+        ciargs->push(deps->at(i+j));
      }
-      write_dependency_to(log(), dept, stride, args);
+      write_dependency_to(log(), dept, ciargs);
+      ciargs->clear();
    }
+    guarantee(deplen == deps->length(), "deps array cannot grow inside nested ResoureMark scope");
  }
 }

 void Dependencies::write_dependency_to(CompileLog* log,
                                       DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                       Klass* witness) {
  if (log == NULL) {
    return;
  }
+  ResourceMark rm;
  ciEnv* env = ciEnv::current();
-  ciBaseObject* ciargs[max_arg_count];
-  assert(nargs <= max_arg_count, "oob");
-  for (int j = 0; j < nargs; j++) {
-    if (args[j].is_oop()) {
-      ciargs[j] = env->get_object(args[j].oop_value());
+  GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(args->length());
+  for (GrowableArrayIterator<DepArgument> it = args->begin(); it != args->end(); ++it) {
+    DepArgument arg = *it;
+    if (arg.is_oop()) {
+      ciargs->push(env->get_object(arg.oop_value()));
    } else {
-      ciargs[j] = env->get_metadata(args[j].metadata_value());
+      ciargs->push(env->get_metadata(arg.metadata_value()));
    }
  }
-  Dependencies::write_dependency_to(log, dept, nargs, ciargs, witness);
+  int argslen = ciargs->length();
+  Dependencies::write_dependency_to(log, dept, ciargs, witness);
+  guarantee(argslen == ciargs->length(), "ciargs array cannot grow inside nested ResoureMark scope");
 }

 void Dependencies::write_dependency_to(CompileLog* log,
                                       DepType dept,
-                                       int nargs, ciBaseObject* args[],
+                                       GrowableArray<ciBaseObject*>* args,
                                       Klass* witness) {
-  if (log == NULL)  return;
-  assert(nargs <= max_arg_count, "oob");
-  int argids[max_arg_count];
-  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  int j;
-  for (j = 0; j < nargs; j++) {
-    if (args[j]->is_object()) {
-      argids[j] = log->identify(args[j]->as_object());
+  if (log == NULL) {
+    return;
+  }
+  ResourceMark rm;
+  GrowableArray<int>* argids = new GrowableArray<int>(args->length());
+  for (GrowableArrayIterator<ciBaseObject*> it = args->begin(); it != args->end(); ++it) {
+    ciBaseObject* obj = *it;
+    if (obj->is_object()) {
+      argids->push(log->identify(obj->as_object()));
    } else {
-      argids[j] = log->identify(args[j]->as_metadata());
+      argids->push(log->identify(obj->as_metadata()));
    }
  }
  if (witness != NULL) {
@ -465,16 +475,17 @@ void Dependencies::write_dependency_to(CompileLog* log,
    log->begin_elem("dependency");
  }
  log->print(" type='%s'", dep_name(dept));
-  if (ctxkj >= 0) {
-    log->print(" ctxk='%d'", argids[ctxkj]);
+  const int ctxkj = dep_context_arg(dept);  // -1 if no context arg
+  if (ctxkj >= 0 && ctxkj < argids->length()) {
+    log->print(" ctxk='%d'", argids->at(ctxkj));
  }
  // write remaining arguments, if any.
-  for (j = 0; j < nargs; j++) {
+  for (int j = 0; j < argids->length(); j++) {
    if (j == ctxkj)  continue;  // already logged
    if (j == 1) {
-      log->print(  " x='%d'",    argids[j]);
+      log->print(  " x='%d'",    argids->at(j));
    } else {
-      log->print(" x%d='%d'", j, argids[j]);
+      log->print(" x%d='%d'", j, argids->at(j));
    }
  }
  if (witness != NULL) {
@ -486,9 +497,12 @@ void Dependencies::write_dependency_to(CompileLog* log,

 void Dependencies::write_dependency_to(xmlStream* xtty,
                                       DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                       Klass* witness) {
-  if (xtty == NULL)  return;
+  if (xtty == NULL) {
+    return;
+  }
+  ResourceMark rm;
  ttyLocker ttyl;
  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
  if (witness != NULL) {
@ -498,23 +512,24 @@ void Dependencies::write_dependency_to(xmlStream* xtty,
  }
  xtty->print(" type='%s'", dep_name(dept));
  if (ctxkj >= 0) {
-    xtty->object("ctxk", args[ctxkj].metadata_value());
+    xtty->object("ctxk", args->at(ctxkj).metadata_value());
  }
  // write remaining arguments, if any.
-  for (int j = 0; j < nargs; j++) {
+  for (int j = 0; j < args->length(); j++) {
    if (j == ctxkj)  continue;  // already logged
+    DepArgument arg = args->at(j);
    if (j == 1) {
-      if (args[j].is_oop()) {
-        xtty->object("x", args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object("x", arg.oop_value());
      } else {
-        xtty->object("x", args[j].metadata_value());
+        xtty->object("x", arg.metadata_value());
      }
    } else {
      char xn[10]; sprintf(xn, "x%d", j);
-      if (args[j].is_oop()) {
-        xtty->object(xn, args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object(xn, arg.oop_value());
      } else {
-        xtty->object(xn, args[j].metadata_value());
+        xtty->object(xn, arg.metadata_value());
      }
    }
  }
@ -525,7 +540,7 @@ void Dependencies::write_dependency_to(xmlStream* xtty,
  xtty->end_elem();
 }

-void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
+void Dependencies::print_dependency(DepType dept, GrowableArray<DepArgument>* args,
                                    Klass* witness) {
  ResourceMark rm;
  ttyLocker ttyl;   // keep the following output all in one block
@ -534,8 +549,8 @@ void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
                dep_name(dept));
  // print arguments
  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  for (int j = 0; j < nargs; j++) {
-    DepArgument arg = args[j];
+  for (int j = 0; j < args->length(); j++) {
+    DepArgument arg = args->at(j);
    bool put_star = false;
    if (arg.is_null())  continue;
    const char* what;
@ -571,31 +586,33 @@ void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
 void Dependencies::DepStream::log_dependency(Klass* witness) {
  if (_deps == NULL && xtty == NULL)  return;  // fast cutout for runtime
  ResourceMark rm;
-  int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  const int nargs = argument_count();
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
  for (int j = 0; j < nargs; j++) {
    if (type() == call_site_target_value) {
-      args[j] = argument_oop(j);
+      args->push(argument_oop(j));
    } else {
-      args[j] = argument(j);
+      args->push(argument(j));
    }
  }
+  int argslen = args->length();
  if (_deps != NULL && _deps->log() != NULL) {
-    Dependencies::write_dependency_to(_deps->log(),
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(_deps->log(), type(), args, witness);
  } else {
-    Dependencies::write_dependency_to(xtty,
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(xtty, type(), args, witness);
  }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }

 void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) {
+  ResourceMark rm;
  int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
  for (int j = 0; j < nargs; j++) {
-    args[j] = argument(j);
+    args->push(argument(j));
  }
-  Dependencies::print_dependency(type(), nargs, args, witness);
+  int argslen = args->length();
+  Dependencies::print_dependency(type(), args, witness);
  if (verbose) {
    if (_code != NULL) {
      tty->print("  code: ");
@ -603,6 +620,7 @@ void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) {
      tty->cr();
    }
  }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }


--- a/hotspot/src/share/vm/code/dependencies.hpp
+++ b/hotspot/src/share/vm/code/dependencies.hpp
@ -369,20 +369,36 @@ class Dependencies: public ResourceObj {
  void copy_to(nmethod* nm);

  void log_all_dependencies();
-  void log_dependency(DepType dept, int nargs, ciBaseObject* args[]) {
-    write_dependency_to(log(), dept, nargs, args);
+
+  void log_dependency(DepType dept, GrowableArray<ciBaseObject*>* args) {
+    ResourceMark rm;
+    int argslen = args->length();
+    write_dependency_to(log(), dept, args);
+    guarantee(argslen == args->length(),
+              "args array cannot grow inside nested ResoureMark scope");
  }
+
  void log_dependency(DepType dept,
                      ciBaseObject* x0,
                      ciBaseObject* x1 = NULL,
                      ciBaseObject* x2 = NULL) {
-    if (log() == NULL)  return;
-    ciBaseObject* args[max_arg_count];
-    args[0] = x0;
-    args[1] = x1;
-    args[2] = x2;
-    assert(2 < max_arg_count, "");
-    log_dependency(dept, dep_args(dept), args);
+    if (log() == NULL) {
+      return;
+    }
+    ResourceMark rm;
+    GrowableArray<ciBaseObject*>* ciargs =
+                new GrowableArray<ciBaseObject*>(dep_args(dept));
+    assert (x0 != NULL, "no log x0");
+    ciargs->push(x0);
+
+    if (x1 != NULL) {
+      ciargs->push(x1);
+    }
+    if (x2 != NULL) {
+      ciargs->push(x2);
+    }
+    assert(ciargs->length() == dep_args(dept), "");
+    log_dependency(dept, ciargs);
  }

  class DepArgument : public ResourceObj {
@ -405,20 +421,8 @@ class Dependencies: public ResourceObj {
    Metadata* metadata_value() const { assert(!_is_oop && _valid, "must be"); return (Metadata*) _value; }
  };

-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, ciBaseObject* args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(xmlStream* xtty,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
  static void print_dependency(DepType dept,
-                               int nargs, DepArgument args[],
+                               GrowableArray<DepArgument>* args,
                               Klass* witness = NULL);

 private:
@ -427,6 +431,18 @@ class Dependencies: public ResourceObj {

  static Klass* ctxk_encoded_as_null(DepType dept, Metadata* x);

+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<ciBaseObject*>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(xmlStream* xtty,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
 public:
  // Use this to iterate over an nmethod's dependency set.
  // Works on new and old dependency sets.
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -5987,6 +5987,8 @@ public:
 };

 void CMSRefProcTaskProxy::work(uint worker_id) {
+  ResourceMark rm;
+  HandleMark hm;
  assert(_collector->_span.equals(_span), "Inconsistency in _span");
  CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                        _mark_bit_map,
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -2167,7 +2167,9 @@ void ConcurrentMark::cleanup() {
  g1h->increment_total_collections();

  // Clean out dead classes and update Metaspace sizes.
-  ClassLoaderDataGraph::purge();
+  if (ClassUnloadingWithConcurrentMark) {
+    ClassLoaderDataGraph::purge();
+  }
  MetaspaceGC::compute_new_size();

  // We reclaimed old regions so we should calculate the sizes to make
@ -2403,6 +2405,8 @@ public:
  }

  virtual void work(uint worker_id) {
+    ResourceMark rm;
+    HandleMark hm;
    CMTask* task = _cm->task(worker_id);
    G1CMIsAliveClosure g1_is_alive(_g1h);
    G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
@ -2595,24 +2599,27 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
  assert(_markStack.isEmpty(), "Marking should have completed");

  // Unload Klasses, String, Symbols, Code Cache, etc.
-
-  G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
-
-  bool purged_classes;
-
  {
-    G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
-    purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
-  }
+    G1RemarkGCTraceTime trace("Unloading", G1Log::finer());

-  {
-    G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
-    weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
-  }
+    if (ClassUnloadingWithConcurrentMark) {
+      bool purged_classes;

-  if (G1StringDedup::is_enabled()) {
-    G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
-    G1StringDedup::unlink(&g1_is_alive);
+      {
+        G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
+      }
+
+      {
+        G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
+        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
+      }
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
+      G1StringDedup::unlink(&g1_is_alive);
+    }
  }
 }

--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -1926,6 +1926,8 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
  _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
  _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
+  _humongous_is_live(),
+  _has_humongous_reclaim_candidates(false),
  _free_regions_coming(false),
  _young_list(new YoungList(this)),
  _gc_time_stamp(0),
@ -2082,6 +2084,7 @@ jint G1CollectedHeap::initialize() {
  _g1h = this;

  _in_cset_fast_test.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
+  _humongous_is_live.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);

  // Create the ConcurrentMark data structure and thread.
  // (Must do this late, so that "max_regions" is defined.)
@ -2177,6 +2180,11 @@ void G1CollectedHeap::stop() {
  }
 }

+void G1CollectedHeap::clear_humongous_is_live_table() {
+  guarantee(G1ReclaimDeadHumongousObjectsAtYoungGC, "Should only be called if true");
+  _humongous_is_live.clear();
+}
+
 size_t G1CollectedHeap::conservative_max_heap_alignment() {
  return HeapRegion::max_region_size();
 }
@ -3771,6 +3779,61 @@ size_t G1CollectedHeap::cards_scanned() {
  return g1_rem_set()->cardsScanned();
 }

+bool G1CollectedHeap::humongous_region_is_always_live(uint index) {
+  HeapRegion* region = region_at(index);
+  assert(region->startsHumongous(), "Must start a humongous object");
+  return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty();
+}
+
+class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
+ private:
+  size_t _total_humongous;
+  size_t _candidate_humongous;
+ public:
+  RegisterHumongousWithInCSetFastTestClosure() : _total_humongous(0), _candidate_humongous(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    uint region_idx = r->hrs_index();
+    bool is_candidate = !g1h->humongous_region_is_always_live(region_idx);
+    // Is_candidate already filters out humongous regions with some remembered set.
+    // This will not lead to humongous object that we mistakenly keep alive because
+    // during young collection the remembered sets will only be added to.
+    if (is_candidate) {
+      g1h->register_humongous_region_with_in_cset_fast_test(region_idx);
+      _candidate_humongous++;
+    }
+    _total_humongous++;
+
+    return false;
+  }
+
+  size_t total_humongous() const { return _total_humongous; }
+  size_t candidate_humongous() const { return _candidate_humongous; }
+};
+
+void G1CollectedHeap::register_humongous_regions_with_in_cset_fast_test() {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0, 0);
+    return;
+  }
+
+  RegisterHumongousWithInCSetFastTestClosure cl;
+  heap_region_iterate(&cl);
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(cl.total_humongous(),
+                                                                  cl.candidate_humongous());
+  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
+
+  if (_has_humongous_reclaim_candidates) {
+    clear_humongous_is_live_table();
+  }
+}
+
 void
 G1CollectedHeap::setup_surviving_young_words() {
  assert(_surviving_young_words == NULL, "pre-condition");
@ -4058,6 +4121,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {

        g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);

+        register_humongous_regions_with_in_cset_fast_test();
+
        _cm->note_start_of_gc();
        // We should not verify the per-thread SATB buffers given that
        // we have not filtered them yet (we'll do so during the
@ -4108,6 +4173,9 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
                                 true  /* verify_fingers */);

        free_collection_set(g1_policy()->collection_set(), evacuation_info);
+
+        eagerly_reclaim_humongous_regions();
+
        g1_policy()->clear_collection_set();

        cleanup_surviving_young_words();
@ -4608,7 +4676,9 @@ void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {

  assert(_worker_id == _par_scan_state->queue_num(), "sanity");

-  if (_g1->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+
+  if (state == G1CollectedHeap::InCSet) {
    oop forwardee;
    if (obj->is_forwarded()) {
      forwardee = obj->forwardee();
@ -4627,6 +4697,9 @@ void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
      do_klass_barrier(p, forwardee);
    }
  } else {
+    if (state == G1CollectedHeap::IsHumongous) {
+      _g1->set_humongous_is_live(obj);
+    }
    // The object is not in collection set. If we're a root scanning
    // closure during an initial mark pause then attempt to mark the object.
    if (do_mark_object == G1MarkFromRoot) {
@ -4847,10 +4920,15 @@ public:
      if (_g1h->g1_policy()->during_initial_mark_pause()) {
        // We also need to mark copied objects.
        strong_root_cl = &scan_mark_root_cl;
-        weak_root_cl   = &scan_mark_weak_root_cl;
        strong_cld_cl  = &scan_mark_cld_cl;
-        weak_cld_cl    = &scan_mark_weak_cld_cl;
        strong_code_cl = &scan_mark_code_cl;
+        if (ClassUnloadingWithConcurrentMark) {
+          weak_root_cl = &scan_mark_weak_root_cl;
+          weak_cld_cl  = &scan_mark_weak_cld_cl;
+        } else {
+          weak_root_cl = &scan_mark_root_cl;
+          weak_cld_cl  = &scan_mark_cld_cl;
+        }
      } else {
        strong_root_cl = &scan_only_root_cl;
        weak_root_cl   = &scan_only_root_cl;
@ -4921,6 +4999,7 @@ g1_process_roots(OopClosure* scan_non_heap_roots,
  double closure_app_time_sec = 0.0;

  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
+  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;

  BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
@ -4930,8 +5009,8 @@ g1_process_roots(OopClosure* scan_non_heap_roots,
                &buf_scan_non_heap_roots,
                &buf_scan_non_heap_weak_roots,
                scan_strong_clds,
-                // Initial Mark handles the weak CLDs separately.
-                (during_im ? NULL : scan_weak_clds),
+                // Unloading Initial Marks handle the weak CLDs separately.
+                (trace_metadata ? NULL : scan_weak_clds),
                scan_strong_code);

  // Now the CM ref_processor roots.
@ -4943,7 +5022,7 @@ g1_process_roots(OopClosure* scan_non_heap_roots,
    ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
  }

-  if (during_im) {
+  if (trace_metadata) {
    // Barrier to make sure all workers passed
    // the strong CLD and strong nmethods phases.
    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
@ -5450,12 +5529,21 @@ class G1KeepAliveClosure: public OopClosure {
 public:
  G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
  void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
-  void do_oop(      oop* p) {
+  void do_oop(oop* p) {
    oop obj = *p;

-    if (_g1->obj_in_cs(obj)) {
+    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
+    if (obj == NULL || cset_state == G1CollectedHeap::InNeither) {
+      return;
+    }
+    if (cset_state == G1CollectedHeap::InCSet) {
      assert( obj->is_forwarded(), "invariant" );
      *p = obj->forwardee();
+    } else {
+      assert(!obj->is_forwarded(), "invariant" );
+      assert(cset_state == G1CollectedHeap::IsHumongous,
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      _g1->set_humongous_is_live(obj);
    }
  }
 };
@ -5485,7 +5573,7 @@ public:
  template <class T> void do_oop_work(T* p) {
    oop obj = oopDesc::load_decode_heap_oop(p);

-    if (_g1h->obj_in_cs(obj)) {
+    if (_g1h->is_in_cset_or_humongous(obj)) {
      // If the referent object has been forwarded (either copied
      // to a new location or to itself in the event of an
      // evacuation failure) then we need to update the reference
@ -5510,10 +5598,10 @@ public:
        assert(!Metaspace::contains((const void*)p),
               err_msg("Unexpectedly found a pointer from metadata: "
                              PTR_FORMAT, p));
-          _copy_non_heap_obj_cl->do_oop(p);
-        }
+        _copy_non_heap_obj_cl->do_oop(p);
      }
    }
+  }
 };

 // Serial drain queue closure. Called as the 'complete_gc'
@ -6435,6 +6523,154 @@ void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& e
  policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }

+class G1FreeHumongousRegionClosure : public HeapRegionClosure {
+ private:
+  FreeRegionList* _free_region_list;
+  HeapRegionSet* _proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;
+  size_t _freed_bytes;
+ public:
+
+  G1FreeHumongousRegionClosure(FreeRegionList* free_region_list) :
+    _free_region_list(free_region_list), _humongous_regions_removed(), _freed_bytes(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    oop obj = (oop)r->bottom();
+    CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+
+    // The following checks whether the humongous object is live are sufficient.
+    // The main additional check (in addition to having a reference from the roots
+    // or the young gen) is whether the humongous object has a remembered set entry.
+    //
+    // A humongous object cannot be live if there is no remembered set for it
+    // because:
+    // - there can be no references from within humongous starts regions referencing
+    // the object because we never allocate other objects into them.
+    // (I.e. there are no intra-region references that may be missed by the
+    // remembered set)
+    // - as soon there is a remembered set entry to the humongous starts region
+    // (i.e. it has "escaped" to an old object) this remembered set entry will stay
+    // until the end of a concurrent mark.
+    //
+    // It is not required to check whether the object has been found dead by marking
+    // or not, in fact it would prevent reclamation within a concurrent cycle, as
+    // all objects allocated during that time are considered live.
+    // SATB marking is even more conservative than the remembered set.
+    // So if at this point in the collection there is no remembered set entry,
+    // nobody has a reference to it.
+    // At the start of collection we flush all refinement logs, and remembered sets
+    // are completely up-to-date wrt to references to the humongous object.
+    //
+    // Other implementation considerations:
+    // - never consider object arrays: while they are a valid target, they have not
+    // been observed to be used as temporary objects.
+    // - they would also pose considerable effort for cleaning up the the remembered
+    // sets.
+    // While this cleanup is not strictly necessary to be done (or done instantly),
+    // given that their occurrence is very low, this saves us this additional
+    // complexity.
+    uint region_idx = r->hrs_index();
+    if (g1h->humongous_is_live(region_idx) ||
+        g1h->humongous_region_is_always_live(region_idx)) {
+
+      if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                               r->isHumongous(),
+                               region_idx,
+                               r->rem_set()->occupied(),
+                               r->rem_set()->strong_code_roots_list_length(),
+                               next_bitmap->isMarked(r->bottom()),
+                               g1h->humongous_is_live(region_idx),
+                               obj->is_objArray()
+                              );
+      }
+
+      return false;
+    }
+
+    guarantee(!obj->is_objArray(),
+              err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.",
+                      r->bottom()));
+
+    if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                             r->isHumongous(),
+                             r->bottom(),
+                             region_idx,
+                             r->region_num(),
+                             r->rem_set()->occupied(),
+                             r->rem_set()->strong_code_roots_list_length(),
+                             next_bitmap->isMarked(r->bottom()),
+                             g1h->humongous_is_live(region_idx),
+                             obj->is_objArray()
+                            );
+    }
+    // Need to clear mark bit of the humongous object if already set.
+    if (next_bitmap->isMarked(r->bottom())) {
+      next_bitmap->clear(r->bottom());
+    }
+    _freed_bytes += r->used();
+    r->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, r->capacity());
+    g1h->free_humongous_region(r, _free_region_list, false);
+
+    return false;
+  }
+
+  HeapRegionSetCount& humongous_free_count() {
+    return _humongous_regions_removed;
+  }
+
+  size_t bytes_freed() const {
+    return _freed_bytes;
+  }
+
+  size_t humongous_reclaimed() const {
+    return _humongous_regions_removed.length();
+  }
+};
+
+void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
+  assert_at_safepoint(true);
+
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
+    return;
+  }
+
+  double start_time = os::elapsedTime();
+
+  FreeRegionList local_cleanup_list("Local Humongous Cleanup List");
+
+  G1FreeHumongousRegionClosure cl(&local_cleanup_list);
+  heap_region_iterate(&cl);
+
+  HeapRegionSetCount empty_set;
+  remove_from_old_sets(empty_set, cl.humongous_free_count());
+
+  G1HRPrinter* hr_printer = _g1h->hr_printer();
+  if (hr_printer->is_active()) {
+    FreeRegionListIterator iter(&local_cleanup_list);
+    while (iter.more_available()) {
+      HeapRegion* hr = iter.get_next();
+      hr_printer->cleanup(hr);
+    }
+  }
+
+  prepend_to_freelist(&local_cleanup_list);
+  decrement_summary_bytes(cl.bytes_freed());
+
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms((os::elapsedTime() - start_time) * 1000.0,
+                                                                    cl.humongous_reclaimed());
+}
+
 // This routine is similar to the above but does not record
 // any policy statistics or update free lists; we are abandoning
 // the current incremental collection set in preparation of a
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -197,16 +197,6 @@ public:
  bool do_object_b(oop p);
 };

-// Instances of this class are used for quick tests on whether a reference points
-// into the collection set. Each of the array's elements denotes whether the
-// corresponding region is in the collection set.
-class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<bool> {
- protected:
-  bool default_value() const { return false; }
- public:
-  void clear() { G1BiasedMappedArray<bool>::clear(); }
-};
-
 class RefineCardTableEntryClosure;

 class G1CollectedHeap : public SharedHeap {
@ -237,6 +227,7 @@ class G1CollectedHeap : public SharedHeap {
  friend class EvacPopObjClosure;
  friend class G1ParCleanupCTTask;

+  friend class G1FreeHumongousRegionClosure;
  // Other related classes.
  friend class G1MarkSweep;

@ -267,6 +258,9 @@ private:
  // It keeps track of the humongous regions.
  HeapRegionSet _humongous_set;

+  void clear_humongous_is_live_table();
+  void eagerly_reclaim_humongous_regions();
+
  // The number of regions we could create by expansion.
  uint _expansion_regions;

@ -367,10 +361,25 @@ private:
  // than the current allocation region.
  size_t _summary_bytes_used;

-  // This array is used for a quick test on whether a reference points into
-  // the collection set or not. Each of the array's elements denotes whether the
-  // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  // Records whether the region at the given index is kept live by roots or
+  // references from the young generation.
+  class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> {
+   protected:
+    bool default_value() const { return false; }
+   public:
+    void clear() { G1BiasedMappedArray<bool>::clear(); }
+    void set_live(uint region) {
+      set_by_index(region, true);
+    }
+    bool is_live(uint region) {
+      return get_by_index(region);
+    }
+  };
+
+  HumongousIsLiveBiasedMappedArray _humongous_is_live;
+  // Stores whether during humongous object registration we found candidate regions.
+  // If not, we can skip a few steps.
+  bool _has_humongous_reclaim_candidates;

  volatile unsigned _gc_time_stamp;

@ -690,10 +699,24 @@ public:
  virtual void gc_prologue(bool full);
  virtual void gc_epilogue(bool full);

+  inline void set_humongous_is_live(oop obj);
+
+  bool humongous_is_live(uint region) {
+    return _humongous_is_live.is_live(region);
+  }
+
+  // Returns whether the given region (which must be a humongous (start) region)
+  // is to be considered conservatively live regardless of any other conditions.
+  bool humongous_region_is_always_live(uint index);
+  // Register the given region to be part of the collection set.
+  inline void register_humongous_region_with_in_cset_fast_test(uint index);
+  // Register regions with humongous objects (actually on the start region) in
+  // the in_cset_fast_test table.
+  void register_humongous_regions_with_in_cset_fast_test();
  // We register a region with the fast "in collection set" test. We
  // simply set to true the array slot corresponding to this region.
  void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_by_index(r->hrs_index(), true);
+    _in_cset_fast_test.set_in_cset(r->hrs_index());
  }

  // This is a fast test on whether a reference points into the
@ -1283,9 +1306,61 @@ public:
  virtual bool is_in(const void* p) const;

  // Return "TRUE" iff the given object address is within the collection
-  // set.
+  // set. Slow implementation.
  inline bool obj_in_cs(oop obj);

+  inline bool is_in_cset(oop obj);
+
+  inline bool is_in_cset_or_humongous(const oop obj);
+
+  enum in_cset_state_t {
+   InNeither,           // neither in collection set nor humongous
+   InCSet,              // region is in collection set only
+   IsHumongous          // region is a humongous start region
+  };
+ private:
+  // Instances of this class are used for quick tests on whether a reference points
+  // into the collection set or is a humongous object (points into a humongous
+  // object).
+  // Each of the array's elements denotes whether the corresponding region is in
+  // the collection set or a humongous region.
+  // We use this to quickly reclaim humongous objects: by making a humongous region
+  // succeed this test, we sort-of add it to the collection set. During the reference
+  // iteration closures, when we see a humongous region, we simply mark it as
+  // referenced, i.e. live.
+  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
+   protected:
+    char default_value() const { return G1CollectedHeap::InNeither; }
+   public:
+    void set_humongous(uintptr_t index) {
+      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
+      set_by_index(index, G1CollectedHeap::IsHumongous);
+    }
+
+    void clear_humongous(uintptr_t index) {
+      set_by_index(index, G1CollectedHeap::InNeither);
+    }
+
+    void set_in_cset(uintptr_t index) {
+      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
+      set_by_index(index, G1CollectedHeap::InCSet);
+    }
+
+    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
+    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
+    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
+    void clear() { G1BiasedMappedArray<char>::clear(); }
+  };
+
+  // This array is used for a quick test on whether a reference points into
+  // the collection set or not. Each of the array's elements denotes whether the
+  // corresponding region is in the collection set or not.
+  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+
+ public:
+
+  inline in_cset_state_t in_cset_state(const oop obj);
+
  // Return "TRUE" iff the given object address is in the reserved
  // region of g1.
  bool is_in_g1_reserved(const void* p) const {
@ -1340,6 +1415,10 @@ public:
  // Return the region with the given index. It assumes the index is valid.
  inline HeapRegion* region_at(uint index) const;

+  // Calculate the region index of the given address. Given address must be
+  // within the heap.
+  inline uint addr_to_region(HeapWord* addr) const;
+
  // Divide the heap region sequence into "chunks" of some size (the number
  // of regions divided by the number of parallel threads times some
  // overpartition factor, currently 4).  Assumes that this will be called
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@ -40,6 +40,13 @@
 // Return the region with the given index. It assumes the index is valid.
 inline HeapRegion* G1CollectedHeap::region_at(uint index) const { return _hrs.at(index); }

+inline uint G1CollectedHeap::addr_to_region(HeapWord* addr) const {
+  assert(is_in_reserved(addr),
+         err_msg("Cannot calculate region index for address "PTR_FORMAT" that is outside of the heap ["PTR_FORMAT", "PTR_FORMAT")",
+                 p2i(addr), p2i(_reserved.start()), p2i(_reserved.end())));
+  return (uint)(pointer_delta(addr, _reserved.start(), sizeof(uint8_t)) >> HeapRegion::LogOfHRGrainBytes);
+}
+
 template <class T>
 inline HeapRegion*
 G1CollectedHeap::heap_region_containing_raw(const T addr) const {
@ -172,12 +179,11 @@ inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
  return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
 }

-
 // This is a fast test on whether a reference points into the
 // collection set or not. Assume that the reference
 // points into the heap.
-inline bool G1CollectedHeap::in_cset_fast_test(oop obj) {
-  bool ret = _in_cset_fast_test.get_by_address((HeapWord*)obj);
+inline bool G1CollectedHeap::is_in_cset(oop obj) {
+  bool ret = _in_cset_fast_test.is_in_cset((HeapWord*)obj);
  // let's make sure the result is consistent with what the slower
  // test returns
  assert( ret || !obj_in_cs(obj), "sanity");
@ -185,6 +191,18 @@ inline bool G1CollectedHeap::in_cset_fast_test(oop obj) {
  return ret;
 }

+bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
+  return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
+}
+
+G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+  return _in_cset_fast_test.at((HeapWord*)obj);
+}
+
+void G1CollectedHeap::register_humongous_region_with_in_cset_fast_test(uint index) {
+  _in_cset_fast_test.set_humongous(index);
+}
+
 #ifndef PRODUCT
 // Support for G1EvacuationFailureALot

@ -288,4 +306,22 @@ inline bool G1CollectedHeap::is_obj_ill(const oop obj) const {
  return is_obj_ill(obj, heap_region_containing(obj));
 }

+inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
+  uint region = addr_to_region((HeapWord*)obj);
+  // We not only set the "live" flag in the humongous_is_live table, but also
+  // reset the entry in the _in_cset_fast_test table so that subsequent references
+  // to the same humongous object do not go into the slow path again.
+  // This is racy, as multiple threads may at the same time enter here, but this
+  // is benign.
+  // During collection we only ever set the "live" flag, and only ever clear the
+  // entry in the in_cset_fast_table.
+  // We only ever evaluate the contents of these tables (in the VM thread) after
+  // having synchronized the worker threads with the VM thread, or in the same
+  // thread (i.e. within the VM thread).
+  if (!_humongous_is_live.is_live(region)) {
+    _humongous_is_live.set_live(region);
+    _in_cset_fast_test.clear_humongous(region);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@ -255,6 +255,10 @@ void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }

+void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
+  LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
  LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %u]", str, value, workers);
 }
@ -357,6 +361,14 @@ void G1GCPhaseTimes::print(double pause_time_sec) {
      _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
    }
  }
+  if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
+    if (G1Log::finest()) {
+      print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
+      print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
+      print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
+    }
+  }
  print_stats(2, "Free CSet",
    (_recorded_young_free_cset_time_ms +
    _recorded_non_young_free_cset_time_ms));
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@ -157,11 +157,17 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
  double _recorded_young_free_cset_time_ms;
  double _recorded_non_young_free_cset_time_ms;

+  double _cur_fast_reclaim_humongous_time_ms;
+  size_t _cur_fast_reclaim_humongous_total;
+  size_t _cur_fast_reclaim_humongous_candidates;
+  size_t _cur_fast_reclaim_humongous_reclaimed;
+
  double _cur_verify_before_time_ms;
  double _cur_verify_after_time_ms;

  // Helper methods for detailed logging
  void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, size_t value);
  void print_stats(int level, const char* str, double value, uint workers);

 public:
@ -282,6 +288,16 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    _recorded_non_young_free_cset_time_ms = time_ms;
  }

+  void record_fast_reclaim_humongous_stats(size_t total, size_t candidates) {
+    _cur_fast_reclaim_humongous_total = total;
+    _cur_fast_reclaim_humongous_candidates = candidates;
+  }
+
+  void record_fast_reclaim_humongous_time_ms(double value, size_t reclaimed) {
+    _cur_fast_reclaim_humongous_time_ms = value;
+    _cur_fast_reclaim_humongous_reclaimed = reclaimed;
+  }
+
  void record_young_cset_choice_time_ms(double time_ms) {
    _recorded_young_cset_choice_time_ms = time_ms;
  }
@ -348,6 +364,10 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    return _recorded_non_young_free_cset_time_ms;
  }

+  double fast_reclaim_humongous_time_ms() {
+    return _cur_fast_reclaim_humongous_time_ms;
+  }
+
  double average_last_update_rs_time() {
    return _last_update_rs_times_ms.average();
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@ -44,7 +44,7 @@ template <class T>
 inline void FilterIntoCSClosure::do_oop_nv(T* p) {
  T heap_oop = oopDesc::load_heap_oop(p);
  if (!oopDesc::is_null(heap_oop) &&
-      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+      _g1->is_in_cset_or_humongous(oopDesc::decode_heap_oop_not_null(heap_oop))) {
    _oc->do_oop(p);
  }
 }
@ -67,7 +67,8 @@ inline void G1ParScanClosure::do_oop_nv(T* p) {

  if (!oopDesc::is_null(heap_oop)) {
    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+    if (state == G1CollectedHeap::InCSet) {
      // We're not going to even bother checking whether the object is
      // already forwarded or not, as this usually causes an immediate
      // stall. We'll try to prefetch the object (for write, given that
@ -86,6 +87,9 @@ inline void G1ParScanClosure::do_oop_nv(T* p) {

      _par_scan_state->push_on_queue(p);
    } else {
+      if (state == G1CollectedHeap::IsHumongous) {
+        _g1->set_humongous_is_live(obj);
+      }
      _par_scan_state->update_rs(_from, p, _worker_id);
    }
  }
@ -97,12 +101,14 @@ inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {

  if (!oopDesc::is_null(heap_oop)) {
    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    if (_g1->is_in_cset_or_humongous(obj)) {
      Prefetch::write(obj->mark_addr(), 0);
      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));

      // Place on the references queue
      _par_scan_state->push_on_queue(p);
+    } else {
+      assert(!_g1->obj_in_cs(obj), "checking");
    }
  }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
@ -288,7 +288,12 @@ void G1ParScanThreadState::undo_allocation(GCAllocPurpose purpose, HeapWord* obj
 }

 HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, size_t word_sz) {
-  HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+  HeapWord* obj = NULL;
+  if (purpose == GCAllocForSurvived) {
+    obj = alloc_buffer(GCAllocForSurvived)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  } else {
+    obj = alloc_buffer(GCAllocForTenured)->allocate(word_sz);
+  }
  if (obj != NULL) {
    return obj;
  }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
@ -52,15 +52,20 @@ template <class T> void G1ParScanThreadState::do_oop_evac(T* p, HeapRegion* from
  // set, due to (benign) races in the claim mechanism during RSet scanning more
  // than one thread might claim the same card. So the same card may be
  // processed multiple times. So redo this check.
-  if (_g1h->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state == G1CollectedHeap::InCSet) {
    oop forwardee;
    if (obj->is_forwarded()) {
      forwardee = obj->forwardee();
    } else {
      forwardee = copy_to_survivor_space(obj);
    }
-    assert(forwardee != NULL, "forwardee should not be NULL");
    oopDesc::encode_store_heap_oop(p, forwardee);
+  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+    _g1h->set_humongous_is_live(obj);
+  } else {
+    assert(in_cset_state == G1CollectedHeap::InNeither,
+           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
  }

  assert(obj != NULL, "Must be");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -349,23 +349,8 @@ void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,

  assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");

-  // The two flags below were introduced temporarily to serialize
-  // the updating and scanning of remembered sets. There are some
-  // race conditions when these two operations are done in parallel
-  // and they are causing failures. When we resolve said race
-  // conditions, we'll revert back to parallel remembered set
-  // updating and scanning. See CRs 6677707 and 6677708.
-  if (G1UseParallelRSetUpdating || (worker_i == 0)) {
-    updateRS(&into_cset_dcq, worker_i);
-  } else {
-    _g1p->phase_times()->record_update_rs_processed_buffers(worker_i, 0);
-    _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
-  }
-  if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, code_root_cl, worker_i);
-  } else {
-    _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
-  }
+  updateRS(&into_cset_dcq, worker_i);
+  scanRS(oc, code_root_cl, worker_i);

  // We now clear the cached values of _cset_rs_update_cl for this worker
  _cset_rs_update_cl[worker_i] = NULL;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
@ -220,14 +220,6 @@
  product(uintx, G1HeapRegionSize, 0,                                       \
          "Size of the G1 regions.")                                        \
                                                                            \
-  experimental(bool, G1UseParallelRSetUpdating, true,                       \
-          "Enables the parallelization of remembered set updating "         \
-          "during evacuation pauses")                                       \
-                                                                            \
-  experimental(bool, G1UseParallelRSetScanning, true,                       \
-          "Enables the parallelization of remembered set scanning "         \
-          "during evacuation pauses")                                       \
-                                                                            \
  product(uintx, G1ConcRefinementThreads, 0,                                \
          "If non-0 is the number of parallel rem set update threads, "     \
          "otherwise the value is determined ergonomically.")               \
@ -289,6 +281,13 @@
          "The amount of code root chunks that should be kept at most "     \
          "as percentage of already allocated.")                            \
                                                                            \
+  experimental(bool, G1ReclaimDeadHumongousObjectsAtYoungGC, true,          \
+          "Try to reclaim dead large objects at every young GC.")           \
+                                                                            \
+  experimental(bool, G1TraceReclaimDeadHumongousObjectsAtYoungGC, false,    \
+          "Print some information about large object liveness "             \
+          "at every young GC.")                                             \
+                                                                            \
  experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
          "An upper bound for the number of old CSet regions expressed "    \
          "as a percentage of the heap size.")                              \
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
@ -94,26 +94,37 @@ G1OffsetTableContigSpace::block_start_const(const void* p) const {
 inline bool
 HeapRegion::block_is_obj(const HeapWord* p) const {
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  return !g1h->is_obj_dead(oop(p), this);
+  if (ClassUnloadingWithConcurrentMark) {
+    return !g1h->is_obj_dead(oop(p), this);
+  }
+  return p < top();
 }

 inline size_t
 HeapRegion::block_size(const HeapWord *addr) const {
+  if (addr == top()) {
+    return pointer_delta(end(), addr);
+  }
+
+  if (block_is_obj(addr)) {
+    return oop(addr)->size();
+  }
+
+  assert(ClassUnloadingWithConcurrentMark,
+      err_msg("All blocks should be objects if G1 Class Unloading isn't used. "
+              "HR: ["PTR_FORMAT", "PTR_FORMAT", "PTR_FORMAT") "
+              "addr: " PTR_FORMAT,
+              p2i(bottom()), p2i(top()), p2i(end()), p2i(addr)));
+
  // Old regions' dead objects may have dead classes
  // We need to find the next live object in some other
  // manner than getting the oop size
  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  if (g1h->is_obj_dead(oop(addr), this)) {
-    HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()->
-        getNextMarkedWordAddress(addr, prev_top_at_mark_start());
+  HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()->
+      getNextMarkedWordAddress(addr, prev_top_at_mark_start());

-    assert(next > addr, "must get the next live object");
-
-    return pointer_delta(next, addr);
-  } else if (addr == top()) {
-    return pointer_delta(end(), addr);
-  }
-  return oop(addr)->size();
+  assert(next > addr, "must get the next live object");
+  return pointer_delta(next, addr);
 }

 inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
@ -695,6 +695,9 @@ void OtherRegionsTable::scrub(CardTableModRefBS* ctbs,
  clear_fcc();
 }

+bool OtherRegionsTable::is_empty() const {
+  return occ_sparse() == 0 && occ_coarse() == 0 && _first_all_fine_prts == NULL;
+}

 size_t OtherRegionsTable::occupied() const {
  size_t sum = occ_fine();
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
@ -185,6 +185,9 @@ public:
  // objects.
  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);

+  // Returns whether this remembered set (and all sub-sets) contain no entries.
+  bool is_empty() const;
+
  size_t occupied() const;
  size_t occ_fine() const;
  size_t occ_coarse() const;
@ -269,6 +272,10 @@ public:
    return _other_regions.hr();
  }

+  bool is_empty() const {
+    return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
+  }
+
  size_t occupied() {
    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
    return occupied_locked();
@ -371,7 +378,7 @@ public:
  void strong_code_roots_do(CodeBlobClosure* blk) const;

  // Returns the number of elements in the strong code roots list
-  size_t strong_code_roots_list_length() {
+  size_t strong_code_roots_list_length() const {
    return _code_roots.length();
  }

--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@ -28,12 +28,12 @@
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
-#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
-#include "gc_implementation/shared/copyFailedInfo.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.inline.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@ -252,7 +252,7 @@ HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
        plab->set_word_size(buf_size);
        plab->set_buf(buf_space);
        record_survivor_plab(buf_space, buf_size);
-        obj = plab->allocate(word_sz);
+        obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
        // Note that we cannot compare buf_size < word_sz below
        // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
        assert(obj != NULL || plab->words_remaining() < word_sz,
--- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@ -168,7 +168,7 @@ class ParScanThreadState {
  HeapWord* alloc_in_to_space_slow(size_t word_sz);

  HeapWord* alloc_in_to_space(size_t word_sz) {
-    HeapWord* obj = to_space_alloc_buffer()->allocate(word_sz);
+    HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
    if (obj != NULL) return obj;
    else return alloc_in_to_space_slow(word_sz);
  }
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp
@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_HPP

 #include "gc_implementation/parallelScavenge/objectStartArray.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/allocation.hpp"

 //
@ -94,23 +95,9 @@ class PSYoungPromotionLAB : public PSPromotionLAB {
  PSYoungPromotionLAB() { }

  // Not MT safe
-  HeapWord* allocate(size_t size) {
-    // Can't assert this, when young fills, we keep the LAB around, but flushed.
-    // assert(_state != flushed, "Sanity");
-    HeapWord* obj = top();
-    HeapWord* new_top = obj + size;
-    // The 'new_top>obj' check is needed to detect overflow of obj+size.
-    if (new_top > obj && new_top <= end()) {
-      set_top(new_top);
-      assert(is_object_aligned((intptr_t)obj) && is_object_aligned((intptr_t)new_top),
-             "checking alignment");
-      return obj;
-    }
+  inline HeapWord* allocate(size_t size);

-    return NULL;
-  }
-
-  debug_only(virtual bool lab_is_valid(MemRegion lab));
+  debug_only(virtual bool lab_is_valid(MemRegion lab);)
 };

 class PSOldPromotionLAB : public PSPromotionLAB {
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp
@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+
+#include "gc_implementation/parallelScavenge/psPromotionLAB.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* PSYoungPromotionLAB::allocate(size_t size) {
+  // Can't assert this, when young fills, we keep the LAB around, but flushed.
+  // assert(_state != flushed, "Sanity");
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end(), SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  HeapWord* new_top = obj + size;
+  // The 'new_top>obj' check is needed to detect overflow of obj+size.
+  if (new_top > obj && new_top <= end()) {
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_object_aligned((intptr_t)new_top),
+           "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@ -27,6 +27,7 @@

 #include "gc_implementation/parallelScavenge/psOldGen.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
+#include "gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 #include "oops/oop.psgc.inline.hpp"

--- a/hotspot/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp
@ -24,7 +24,7 @@

 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/allocation.hpp"
 #include "memory/blockOffsetTable.hpp"
 #include "memory/threadLocalAllocBuffer.hpp"
@ -84,6 +84,9 @@ public:
    }
  }

+  // Allocate the object aligned to "alignment_in_bytes".
+  HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
+
  // Undo the last allocation in the buffer, which is required to be of the
  // "obj" of the given "word_sz".
  void undo_allocation(HeapWord* obj, size_t word_sz) {
--- a/hotspot/src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* ParGCAllocBuffer::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
+
+  HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes);
+  if (res == NULL) {
+    return NULL;
+  }
+
+  // Set _top so that allocate(), which expects _top to be correctly set,
+  // can be used below.
+  _top = res;
+  return allocate(word_sz);
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp
@ -195,6 +195,7 @@ void VM_GenCollectFull::doit() {
  gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
 }

+// Returns true iff concurrent GCs unloads metadata.
 bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() {
 #if INCLUDE_ALL_GCS
  if (UseConcMarkSweepGC && CMSClassUnloadingEnabled) {
@ -202,7 +203,7 @@ bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() {
    return true;
  }

-  if (UseG1GC) {
+  if (UseG1GC && ClassUnloadingWithConcurrentMark) {
    G1CollectedHeap* g1h = G1CollectedHeap::heap();
    g1h->g1_policy()->set_initiate_conc_mark_if_possible();

--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp
@ -351,6 +351,12 @@ class CollectedHeap : public CHeapObj<mtInternal> {
    fill_with_object(start, pointer_delta(end, start), zap);
  }

+  // Return the address "addr" aligned by "alignment_in_bytes" if such
+  // an address is below "end".  Return NULL otherwise.
+  inline static HeapWord* align_allocation_or_fail(HeapWord* addr,
+                                                   HeapWord* end,
+                                                   unsigned short alignment_in_bytes);
+
  // Some heaps may offer a contiguous region for shared non-blocking
  // allocation, via inlined code (by exporting the address of the top and
  // end fields defining the extent of the contiguous allocation region.)
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.inline.hpp
@ -241,6 +241,44 @@ inline void CollectedHeap::oop_iterate_no_header(OopClosure* cl) {
  oop_iterate(&no_header_cl);
 }

+
+inline HeapWord* CollectedHeap::align_allocation_or_fail(HeapWord* addr,
+                                                         HeapWord* end,
+                                                         unsigned short alignment_in_bytes) {
+  if (alignment_in_bytes <= ObjectAlignmentInBytes) {
+    return addr;
+  }
+
+  assert(is_ptr_aligned(addr, HeapWordSize),
+    err_msg("Address " PTR_FORMAT " is not properly aligned.", p2i(addr)));
+  assert(is_size_aligned(alignment_in_bytes, HeapWordSize),
+    err_msg("Alignment size %u is incorrect.", alignment_in_bytes));
+
+  HeapWord* new_addr = (HeapWord*) align_pointer_up(addr, alignment_in_bytes);
+  size_t padding = pointer_delta(new_addr, addr);
+
+  if (padding == 0) {
+    return addr;
+  }
+
+  if (padding < CollectedHeap::min_fill_size()) {
+    padding += alignment_in_bytes / HeapWordSize;
+    assert(padding >= CollectedHeap::min_fill_size(),
+      err_msg("alignment_in_bytes %u is expect to be larger "
+      "than the minimum object size", alignment_in_bytes));
+    new_addr = addr + padding;
+  }
+
+  assert(new_addr > addr, err_msg("Unexpected arithmetic overflow "
+    PTR_FORMAT " not greater than " PTR_FORMAT, p2i(new_addr), p2i(addr)));
+  if(new_addr < end) {
+    CollectedHeap::fill_with_object(addr, padding);
+    return new_addr;
+  } else {
+    return NULL;
+  }
+}
+
 #ifndef PRODUCT

 inline bool
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
@ -987,17 +987,6 @@ ConstantPoolCacheEntry *cp_entry))
  int index = cp_entry->field_index();
  if ((ik->field_access_flags(index) & JVM_ACC_FIELD_ACCESS_WATCHED) == 0) return;

-  switch(cp_entry->flag_state()) {
-    case btos:    // fall through
-    case ctos:    // fall through
-    case stos:    // fall through
-    case itos:    // fall through
-    case ftos:    // fall through
-    case ltos:    // fall through
-    case dtos:    // fall through
-    case atos: break;
-    default: ShouldNotReachHere(); return;
-  }
  bool is_static = (obj == NULL);
  HandleMark hm(thread);

--- a/hotspot/src/share/vm/memory/defNewGeneration.cpp
+++ b/hotspot/src/share/vm/memory/defNewGeneration.cpp
@ -790,7 +790,7 @@ oop DefNewGeneration::copy_to_survivor_space(oop old) {

  // Try allocating obj in to-space (unless too old)
  if (old->age() < tenuring_threshold()) {
-    obj = (oop) to()->allocate(s);
+    obj = (oop) to()->allocate_aligned(s);
  }

  // Otherwise try allocating obj tenured
--- a/hotspot/src/share/vm/memory/sharedHeap.cpp
+++ b/hotspot/src/share/vm/memory/sharedHeap.cpp
@ -159,9 +159,9 @@ SharedHeap::StrongRootsScope::~StrongRootsScope() {
 Monitor* SharedHeap::StrongRootsScope::_lock = new Monitor(Mutex::leaf, "StrongRootsScope lock", false);

 void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers) {
-  // The Thread work barrier is only needed by G1.
+  // The Thread work barrier is only needed by G1 Class Unloading.
  // No need to use the barrier if this is single-threaded code.
-  if (UseG1GC && n_workers > 0) {
+  if (UseG1GC && ClassUnloadingWithConcurrentMark && n_workers > 0) {
    uint new_value = (uint)Atomic::add(1, &_n_workers_done_with_threads);
    if (new_value == n_workers) {
      // This thread is last. Notify the others.
@ -172,6 +172,9 @@ void SharedHeap::StrongRootsScope::mark_worker_done_with_threads(uint n_workers)
 }

 void SharedHeap::StrongRootsScope::wait_until_all_workers_done_with_threads(uint n_workers) {
+  assert(UseG1GC,                          "Currently only used by G1");
+  assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
+
  // No need to use the barrier if this is single-threaded code.
  if (n_workers > 0 && (uint)_n_workers_done_with_threads != n_workers) {
    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
--- a/hotspot/src/share/vm/memory/space.cpp
+++ b/hotspot/src/share/vm/memory/space.cpp
@ -28,6 +28,7 @@
 #include "gc_implementation/shared/liveRange.hpp"
 #include "gc_implementation/shared/markSweep.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "memory/genCollectedHeap.hpp"
@ -720,6 +721,27 @@ inline HeapWord* ContiguousSpace::par_allocate_impl(size_t size,
  } while (true);
 }

+HeapWord* ContiguousSpace::allocate_aligned(size_t size) {
+  assert(Heap_lock->owned_by_self() || (SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread()), "not locked");
+  HeapWord* end_value = end();
+
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end_value, SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_aligned(new_top),
+      "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
 // Requires locking.
 HeapWord* ContiguousSpace::allocate(size_t size) {
  return allocate_impl(size, end());
--- a/hotspot/src/share/vm/memory/space.hpp
+++ b/hotspot/src/share/vm/memory/space.hpp
@ -526,6 +526,7 @@ class ContiguousSpace: public CompactibleSpace {
  // Allocation (return NULL if full)
  virtual HeapWord* allocate(size_t word_size);
  virtual HeapWord* par_allocate(size_t word_size);
+  HeapWord* allocate_aligned(size_t word_size);

  // Iteration
  void oop_iterate(ExtendedOopClosure* cl);
--- a/hotspot/src/share/vm/oops/method.cpp
+++ b/hotspot/src/share/vm/oops/method.cpp
@ -283,6 +283,13 @@ address Method::bcp_from(int bci) const {
  return bcp;
 }

+address Method::bcp_from(address bcp) const {
+  if (is_native() && bcp == NULL) {
+    return code_base();
+  } else {
+    return bcp;
+  }
+}

 int Method::size(bool is_native) {
  // If native, then include pointers for native_function and signature_handler
--- a/hotspot/src/share/vm/oops/method.hpp
+++ b/hotspot/src/share/vm/oops/method.hpp
@ -648,7 +648,8 @@ class Method : public Metadata {

  // Returns the byte code index from the byte code pointer
  int     bci_from(address bcp) const;
-  address bcp_from(int     bci) const;
+  address bcp_from(int bci) const;
+  address bcp_from(address bcp) const;
  int validate_bci_from_bcp(address bcp) const;
  int validate_bci(int bci) const;

--- a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
@ -55,8 +55,6 @@ inline void oopDesc::follow_contents(ParCompactionManager* cm) {
  klass()->oop_follow_contents(cm, this);
 }

-// Used by parallel old GC.
-
 inline oop oopDesc::forward_to_atomic(oop p) {
  assert(ParNewGeneration::is_legal_forward_ptr(p),
         "illegal forwarding pointer value.");
--- a/hotspot/src/share/vm/opto/callGenerator.cpp
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -357,7 +357,7 @@ void LateInlineCallGenerator::do_late_inline() {

  // Make sure the state is a MergeMem for parsing.
  if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
-    Node* mem = MergeMemNode::make(C, map->in(TypeFunc::Memory));
+    Node* mem = MergeMemNode::make(map->in(TypeFunc::Memory));
    C->initial_gvn()->set_type_bottom(mem);
    map->set_req(TypeFunc::Memory, mem);
  }
--- a/hotspot/src/share/vm/opto/callnode.cpp
+++ b/hotspot/src/share/vm/opto/callnode.cpp
@ -688,7 +688,7 @@ Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
    return new MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);

  case TypeFunc::Parms+1:       // For LONG & DOUBLE returns
-    assert(tf()->_range->field_at(TypeFunc::Parms+1) == Type::HALF, "");
+    assert(tf()->range()->field_at(TypeFunc::Parms+1) == Type::HALF, "");
    // 2nd half of doubles and longs
    return new MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad);

@ -778,7 +778,7 @@ bool CallNode::has_non_debug_use(Node *n) {
 }

 // Returns the unique CheckCastPP of a call
-// or 'this' if there are several CheckCastPP
+// or 'this' if there are several CheckCastPP or unexpected uses
 // or returns NULL if there is no one.
 Node *CallNode::result_cast() {
  Node *cast = NULL;
@ -794,6 +794,13 @@ Node *CallNode::result_cast() {
        return this;  // more than 1 CheckCastPP
      }
      cast = use;
+    } else if (!use->is_Initialize() &&
+               !use->is_AddP()) {
+      // Expected uses are restricted to a CheckCastPP, an Initialize
+      // node, and AddP nodes. If we encounter any other use (a Phi
+      // node can be seen in rare cases) return this to prevent
+      // incorrect optimizations.
+      return this;
    }
  }
  return cast;
--- a/hotspot/src/share/vm/opto/cfgnode.cpp
+++ b/hotspot/src/share/vm/opto/cfgnode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -108,6 +108,7 @@ static Node *merge_region(RegionNode *region, PhaseGVN *phase) {

        rreq++;                 // One more input to Region
      } // Found a region to merge into Region
+      igvn->_worklist.push(r);
      // Clobber pointer to the now dead 'r'
      region->set_req(i, phase->C->top());
    }
@ -449,6 +450,7 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  // Remove TOP or NULL input paths. If only 1 input path remains, this Region
  // degrades to a copy.
  bool add_to_worklist = false;
+  bool modified = false;
  int cnt = 0;                  // Count of values merging
  DEBUG_ONLY( int cnt_orig = req(); ) // Save original inputs count
  int del_it = 0;               // The last input path we delete
@ -459,6 +461,7 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
      // Remove useless control copy inputs
      if( n->is_Region() && n->as_Region()->is_copy() ) {
        set_req(i, n->nonnull_req());
+        modified = true;
        i--;
        continue;
      }
@ -466,12 +469,14 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
        Node *call = n->in(0);
        if (call->is_Call() && call->as_Call()->entry_point() == OptoRuntime::rethrow_stub()) {
          set_req(i, call->in(0));
+          modified = true;
          i--;
          continue;
        }
      }
      if( phase->type(n) == Type::TOP ) {
        set_req(i, NULL);       // Ignore TOP inputs
+        modified = true;
        i--;
        continue;
      }
@ -691,7 +696,7 @@ Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    }
  }

-  return NULL;
+  return modified ? this : NULL;
 }


@ -1871,7 +1876,7 @@ Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
          igvn->register_new_node_with_optimizer(new_base);
          hook->add_req(new_base);
        }
-        MergeMemNode* result = MergeMemNode::make(phase->C, new_base);
+        MergeMemNode* result = MergeMemNode::make(new_base);
        for (uint i = 1; i < req(); ++i) {
          Node *ii = in(i);
          if (ii->is_MergeMem()) {
--- a/hotspot/src/share/vm/opto/chaitin.cpp
+++ b/hotspot/src/share/vm/opto/chaitin.cpp
@ -1620,7 +1620,7 @@ void PhaseChaitin::fixup_spills() {
          C->check_node_count(0, "out of nodes fixing spills");
          if (C->failing())  return;
          // Transform node
-          MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();
+          MachNode *cisc = mach->cisc_version(stk_offset)->as_Mach();
          cisc->set_req(inp,fp);          // Base register is frame pointer
          if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {
            assert( cisc->oper_input_base() == 2, "Only adding one edge");
--- a/hotspot/src/share/vm/opto/compile.cpp
+++ b/hotspot/src/share/vm/opto/compile.cpp
@ -1039,6 +1039,7 @@ void Compile::Init(int aliaslevel) {

  _node_note_array = NULL;
  _default_node_notes = NULL;
+  DEBUG_ONLY( _modified_nodes = NULL; ) // Used in Optimize()

  _immutable_memory = NULL; // filled in at first inquiry

@ -1247,6 +1248,18 @@ void Compile::print_missing_nodes() {
    }
  }
 }
+void Compile::record_modified_node(Node* n) {
+  if (_modified_nodes != NULL && !_inlining_incrementally &&
+      n->outcnt() != 0 && !n->is_Con()) {
+    _modified_nodes->push(n);
+  }
+}
+
+void Compile::remove_modified_node(Node* n) {
+  if (_modified_nodes != NULL) {
+    _modified_nodes->remove(n);
+  }
+}
 #endif

 #ifndef PRODUCT
@ -2035,6 +2048,9 @@ void Compile::Optimize() {
  // Iterative Global Value Numbering, including ideal transforms
  // Initialize IterGVN with types and values from parse-time GVN
  PhaseIterGVN igvn(initial_gvn());
+#ifdef ASSERT
+  _modified_nodes = new (comp_arena()) Unique_Node_List(comp_arena());
+#endif
  {
    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
    igvn.optimize();
@ -2197,6 +2213,7 @@ void Compile::Optimize() {
    }
  }

+  DEBUG_ONLY( _modified_nodes = NULL; )
 } // (End scope of igvn; run destructor if necessary for asserts.)

  process_print_inlining();
@ -2825,7 +2842,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
          // oops implicit null check is not generated.
          // This will allow to generate normal oop implicit null check.
          if (Matcher::gen_narrow_oop_implicit_null_checks())
-            new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);
+            new_in2 = ConNode::make(TypeNarrowOop::NULL_PTR);
          //
          // This transformation together with CastPP transformation above
          // will generated code for implicit NULL checks for compressed oops.
@ -2864,9 +2881,9 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
          //    NullCheck base_reg
          //
        } else if (t->isa_oopptr()) {
-          new_in2 = ConNode::make(this, t->make_narrowoop());
+          new_in2 = ConNode::make(t->make_narrowoop());
        } else if (t->isa_klassptr()) {
-          new_in2 = ConNode::make(this, t->make_narrowklass());
+          new_in2 = ConNode::make(t->make_narrowklass());
        }
      }
      if (new_in2 != NULL) {
@ -2899,11 +2916,11 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
      const Type* t = in1->bottom_type();
      if (t == TypePtr::NULL_PTR) {
        assert(t->isa_oopptr(), "null klass?");
-        n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);
+        n->subsume_by(ConNode::make(TypeNarrowOop::NULL_PTR), this);
      } else if (t->isa_oopptr()) {
-        n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);
+        n->subsume_by(ConNode::make(t->make_narrowoop()), this);
      } else if (t->isa_klassptr()) {
-        n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);
+        n->subsume_by(ConNode::make(t->make_narrowklass()), this);
      }
    }
    if (in1->outcnt() == 0) {
@ -2964,7 +2981,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
      if (d) {
        // Replace them with a fused divmod if supported
        if (Matcher::has_match_rule(Op_DivModI)) {
-          DivModINode* divmod = DivModINode::make(this, n);
+          DivModINode* divmod = DivModINode::make(n);
          d->subsume_by(divmod->div_proj(), this);
          n->subsume_by(divmod->mod_proj(), this);
        } else {
@ -2984,7 +3001,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
      if (d) {
        // Replace them with a fused divmod if supported
        if (Matcher::has_match_rule(Op_DivModL)) {
-          DivModLNode* divmod = DivModLNode::make(this, n);
+          DivModLNode* divmod = DivModLNode::make(n);
          d->subsume_by(divmod->div_proj(), this);
          n->subsume_by(divmod->mod_proj(), this);
        } else {
@ -3010,7 +3027,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
    if (n->req()-1 > 2) {
      // Replace many operand PackNodes with a binary tree for matching
      PackNode* p = (PackNode*) n;
-      Node* btp = p->binary_tree_pack(this, 1, n->req());
+      Node* btp = p->binary_tree_pack(1, n->req());
      n->subsume_by(btp, this);
    }
    break;
@ -3035,11 +3052,11 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
      if (t != NULL && t->is_con()) {
        juint shift = t->get_con();
        if (shift > mask) { // Unsigned cmp
-          n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));
+          n->set_req(2, ConNode::make(TypeInt::make(shift & mask)));
        }
      } else {
        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
-          Node* shift = new AndINode(in2, ConNode::make(this, TypeInt::make(mask)));
+          Node* shift = new AndINode(in2, ConNode::make(TypeInt::make(mask)));
          n->set_req(2, shift);
        }
      }
@ -4031,6 +4048,7 @@ void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
  int j = 0;
  int identical = 0;
  int i = 0;
+  bool modified = false;
  for (; i < _expensive_nodes->length()-1; i++) {
    assert(j <= i, "can't write beyond current index");
    if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
@ -4043,20 +4061,23 @@ void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
      identical = 0;
    } else {
      Node* n = _expensive_nodes->at(i);
-      igvn.hash_delete(n);
-      n->set_req(0, NULL);
+      igvn.replace_input_of(n, 0, NULL);
      igvn.hash_insert(n);
+      modified = true;
    }
  }
  if (identical > 0) {
    _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
  } else if (_expensive_nodes->length() >= 1) {
    Node* n = _expensive_nodes->at(i);
-    igvn.hash_delete(n);
-    n->set_req(0, NULL);
+    igvn.replace_input_of(n, 0, NULL);
    igvn.hash_insert(n);
+    modified = true;
  }
  _expensive_nodes->trunc_to(j);
+  if (modified) {
+    igvn.optimize();
+  }
 }

 void Compile::add_expensive_node(Node * n) {
--- a/hotspot/src/share/vm/opto/compile.hpp
+++ b/hotspot/src/share/vm/opto/compile.hpp
@ -344,6 +344,8 @@ class Compile : public Phase {
  VectorSet             _dead_node_list;        // Set of dead nodes
  uint                  _dead_node_count;       // Number of dead nodes; VectorSet::Size() is O(N).
                                                // So use this to keep count and make the call O(1).
+  DEBUG_ONLY( Unique_Node_List* _modified_nodes; )  // List of nodes which inputs were modified
+
  debug_only(static int _debug_idx;)            // Monotonic counter (not reset), use -XX:BreakAtNode=<idx>
  Arena                 _node_arena;            // Arena for new-space Nodes
  Arena                 _old_arena;             // Arena for old-space Nodes, lifetime during xform
@ -766,6 +768,11 @@ class Compile : public Phase {
  void         print_missing_nodes();
 #endif

+  // Record modified nodes to check that they are put on IGVN worklist
+  void         record_modified_node(Node* n) NOT_DEBUG_RETURN;
+  void         remove_modified_node(Node* n) NOT_DEBUG_RETURN;
+  DEBUG_ONLY( Unique_Node_List*   modified_nodes() const { return _modified_nodes; } )
+
  // Constant table
  ConstantTable&   constant_table() { return _constant_table; }

--- a/hotspot/src/share/vm/opto/connode.cpp
+++ b/hotspot/src/share/vm/opto/connode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ uint ConNode::hash() const {
 }

 //------------------------------make-------------------------------------------
-ConNode *ConNode::make( Compile* C, const Type *t ) {
+ConNode *ConNode::make(const Type *t) {
  switch( t->basic_type() ) {
  case T_INT:         return new ConINode( t->is_int() );
  case T_LONG:        return new ConLNode( t->is_long() );
--- a/hotspot/src/share/vm/opto/connode.hpp
+++ b/hotspot/src/share/vm/opto/connode.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -46,7 +46,7 @@ public:
  virtual const RegMask &in_RegMask(uint) const { return RegMask::Empty; }

  // Polymorphic factory method:
-  static ConNode* make( Compile* C, const Type *t );
+  static ConNode* make(const Type *t);
 };

 //------------------------------ConINode---------------------------------------
@ -57,7 +57,7 @@ public:
  virtual int Opcode() const;

  // Factory method:
-  static ConINode* make( Compile* C, int con ) {
+  static ConINode* make(int con) {
    return new ConINode( TypeInt::make(con) );
  }

@ -71,7 +71,7 @@ public:
  virtual int Opcode() const;

  // Factory methods:
-  static ConPNode* make( Compile *C ,address con ) {
+  static ConPNode* make(address con) {
    if (con == NULL)
      return new ConPNode( TypePtr::NULL_PTR ) ;
    else
@ -105,7 +105,7 @@ public:
  virtual int Opcode() const;

  // Factory method:
-  static ConLNode* make( Compile *C ,jlong con ) {
+  static ConLNode* make(jlong con) {
    return new ConLNode( TypeLong::make(con) );
  }

@ -119,7 +119,7 @@ public:
  virtual int Opcode() const;

  // Factory method:
-  static ConFNode* make( Compile *C, float con  ) {
+  static ConFNode* make(float con) {
    return new ConFNode( TypeF::make(con) );
  }

@ -133,7 +133,7 @@ public:
  virtual int Opcode() const;

  // Factory method:
-  static ConDNode* make( Compile *C, double con ) {
+  static ConDNode* make(double con) {
    return new ConDNode( TypeD::make(con) );
  }

--- a/hotspot/src/share/vm/opto/divnode.cpp
+++ b/hotspot/src/share/vm/opto/divnode.cpp
@ -479,7 +479,10 @@ Node *DivINode::Ideal(PhaseGVN *phase, bool can_reshape) {

  if (i == 0) return NULL;      // Dividing by zero constant does not idealize

-  set_req(0,NULL);              // Dividing by a not-zero constant; no faulting
+  if (in(0) != NULL) {
+    phase->igvn_rehash_node_delayed(this);
+    set_req(0, NULL);           // Dividing by a not-zero constant; no faulting
+  }

  // Dividing by MININT does not optimize as a power-of-2 shift.
  if( i == min_jint ) return NULL;
@ -578,7 +581,10 @@ Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) {

  if (l == 0) return NULL;      // Dividing by zero constant does not idealize

-  set_req(0,NULL);              // Dividing by a not-zero constant; no faulting
+  if (in(0) != NULL) {
+    phase->igvn_rehash_node_delayed(this);
+    set_req(0, NULL);           // Dividing by a not-zero constant; no faulting
+  }

  // Dividing by MINLONG does not optimize as a power-of-2 shift.
  if( l == min_jlong ) return NULL;
@ -1274,7 +1280,7 @@ DivModNode::DivModNode( Node *c, Node *dividend, Node *divisor ) : MultiNode(3)
 }

 //------------------------------make------------------------------------------
-DivModINode* DivModINode::make(Compile* C, Node* div_or_mod) {
+DivModINode* DivModINode::make(Node* div_or_mod) {
  Node* n = div_or_mod;
  assert(n->Opcode() == Op_DivI || n->Opcode() == Op_ModI,
         "only div or mod input pattern accepted");
@ -1286,7 +1292,7 @@ DivModINode* DivModINode::make(Compile* C, Node* div_or_mod) {
 }

 //------------------------------make------------------------------------------
-DivModLNode* DivModLNode::make(Compile* C, Node* div_or_mod) {
+DivModLNode* DivModLNode::make(Node* div_or_mod) {
  Node* n = div_or_mod;
  assert(n->Opcode() == Op_DivL || n->Opcode() == Op_ModL,
         "only div or mod input pattern accepted");
--- a/hotspot/src/share/vm/opto/divnode.hpp
+++ b/hotspot/src/share/vm/opto/divnode.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -168,7 +168,7 @@ public:
  virtual Node *match( const ProjNode *proj, const Matcher *m );

  // Make a divmod and associated projections from a div or mod.
-  static DivModINode* make(Compile* C, Node* div_or_mod);
+  static DivModINode* make(Node* div_or_mod);
 };

 //------------------------------DivModLNode---------------------------------------
@ -181,7 +181,7 @@ public:
  virtual Node *match( const ProjNode *proj, const Matcher *m );

  // Make a divmod and associated projections from a div or mod.
-  static DivModLNode* make(Compile* C, Node* div_or_mod);
+  static DivModLNode* make(Node* div_or_mod);
 };

 #endif // SHARE_VM_OPTO_DIVNODE_HPP
--- a/hotspot/src/share/vm/opto/escape.cpp
+++ b/hotspot/src/share/vm/opto/escape.cpp
@ -1452,7 +1452,6 @@ int ConnectionGraph::find_init_values(JavaObjectNode* pta, PointsToNode* init_va
    return 0;

  InitializeNode* ini = alloc->as_Allocate()->initialization();
-  Compile* C = _compile;
  bool visited_bottom_offset = false;
  GrowableArray<int> offsets_worklist;

--- a/hotspot/src/share/vm/opto/graphKit.cpp
+++ b/hotspot/src/share/vm/opto/graphKit.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -591,7 +591,7 @@ void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) {
        C->log()->elem("hot_throw preallocated='1' reason='%s'",
                       Deoptimization::trap_reason_name(reason));
      const TypeInstPtr* ex_con  = TypeInstPtr::make(ex_obj);
-      Node*              ex_node = _gvn.transform( ConNode::make(C, ex_con) );
+      Node*              ex_node = _gvn.transform(ConNode::make(ex_con));

      // Clear the detail message of the preallocated exception object.
      // Weblogic sometimes mutates the detail message of exceptions
@ -706,7 +706,7 @@ SafePointNode* GraphKit::clone_map() {
  if (map() == NULL)  return NULL;

  // Clone the memory edge first
-  Node* mem = MergeMemNode::make(C, map()->memory());
+  Node* mem = MergeMemNode::make(map()->memory());
  gvn().set_type_bottom(mem);

  SafePointNode *clonemap = (SafePointNode*)map()->clone();
@ -1135,7 +1135,7 @@ Node* GraphKit::ConvI2UL(Node* offset) {
    return longcon((julong) offset_con);
  }
  Node* conv = _gvn.transform( new ConvI2LNode(offset));
-  Node* mask = _gvn.transform( ConLNode::make(C, (julong) max_juint) );
+  Node* mask = _gvn.transform(ConLNode::make((julong) max_juint));
  return _gvn.transform( new AndLNode(conv, mask) );
 }

@ -1435,7 +1435,7 @@ Node* GraphKit::reset_memory() {

 //------------------------------set_all_memory---------------------------------
 void GraphKit::set_all_memory(Node* newmem) {
-  Node* mergemem = MergeMemNode::make(C, newmem);
+  Node* mergemem = MergeMemNode::make(newmem);
  gvn().set_type_bottom(mergemem);
  map()->set_memory(mergemem);
 }
@ -1464,9 +1464,9 @@ Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
  Node* mem = memory(adr_idx);
  Node* ld;
  if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo);
+    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, mo);
  } else if (require_atomic_access && bt == T_DOUBLE) {
-    ld = LoadDNode::make_atomic(C, ctl, mem, adr, adr_type, t, mo);
+    ld = LoadDNode::make_atomic(ctl, mem, adr, adr_type, t, mo);
  } else {
    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, mo);
  }
@ -1488,9 +1488,9 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
  Node *mem = memory(adr_idx);
  Node* st;
  if (require_atomic_access && bt == T_LONG) {
-    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
+    st = StoreLNode::make_atomic(ctl, mem, adr, adr_type, val, mo);
  } else if (require_atomic_access && bt == T_DOUBLE) {
-    st = StoreDNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
+    st = StoreDNode::make_atomic(ctl, mem, adr, adr_type, val, mo);
  } else {
    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
  }
@ -2084,9 +2084,9 @@ Node* GraphKit::just_allocated_object(Node* current_control) {
 void GraphKit::round_double_arguments(ciMethod* dest_method) {
  // (Note:  TypeFunc::make has a cache that makes this fast.)
  const TypeFunc* tf    = TypeFunc::make(dest_method);
-  int             nargs = tf->_domain->_cnt - TypeFunc::Parms;
+  int             nargs = tf->domain()->cnt() - TypeFunc::Parms;
  for (int j = 0; j < nargs; j++) {
-    const Type *targ = tf->_domain->field_at(j + TypeFunc::Parms);
+    const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
    if( targ->basic_type() == T_DOUBLE ) {
      // If any parameters are doubles, they must be rounded before
      // the call, dstore_rounding does gvn.transform
@ -2188,10 +2188,10 @@ void GraphKit::record_profiled_arguments_for_speculation(ciMethod* dest_method,
    return;
  }
  const TypeFunc* tf    = TypeFunc::make(dest_method);
-  int             nargs = tf->_domain->_cnt - TypeFunc::Parms;
+  int             nargs = tf->domain()->cnt() - TypeFunc::Parms;
  int skip = Bytecodes::has_receiver(bc) ? 1 : 0;
  for (int j = skip, i = 0; j < nargs && i < TypeProfileArgsLimit; j++) {
-    const Type *targ = tf->_domain->field_at(j + TypeFunc::Parms);
+    const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms);
    if (targ->basic_type() == T_OBJECT || targ->basic_type() == T_ARRAY) {
      bool maybe_null = true;
      ciKlass* better_type = NULL;
@ -3364,7 +3364,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
    // This will allow us to observe initializations when they occur,
    // and link them properly (as a group) to the InitializeNode.
    assert(init->in(InitializeNode::Memory) == malloc, "");
-    MergeMemNode* minit_in = MergeMemNode::make(C, malloc);
+    MergeMemNode* minit_in = MergeMemNode::make(malloc);
    init->set_req(InitializeNode::Memory, minit_in);
    record_for_igvn(minit_in); // fold it up later, if possible
    Node* minit_out = memory(rawidx);
--- a/hotspot/src/share/vm/opto/idealKit.cpp
+++ b/hotspot/src/share/vm/opto/idealKit.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -320,7 +320,7 @@ Node* IdealKit::copy_cvstate() {
  Node* ns = new_cvstate();
  for (uint i = 0; i < ns->req(); i++) ns->init_req(i, _cvstate->in(i));
  // We must clone memory since it will be updated as we do stores.
-  ns->set_req(TypeFunc::Memory, MergeMemNode::make(C, ns->in(TypeFunc::Memory)));
+  ns->set_req(TypeFunc::Memory, MergeMemNode::make(ns->in(TypeFunc::Memory)));
  return ns;
 }

@ -359,7 +359,7 @@ Node* IdealKit::load(Node* ctl,
  Node* mem = memory(adr_idx);
  Node* ld;
  if (require_atomic_access && bt == T_LONG) {
-    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, MemNode::unordered);
+    ld = LoadLNode::make_atomic(ctl, mem, adr, adr_type, t, MemNode::unordered);
  } else {
    ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, MemNode::unordered);
  }
@ -375,7 +375,7 @@ Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
  Node *mem = memory(adr_idx);
  Node* st;
  if (require_atomic_access && bt == T_LONG) {
-    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, mo);
+    st = StoreLNode::make_atomic(ctl, mem, adr, adr_type, val, mo);
  } else {
    st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, mo);
  }
--- a/hotspot/src/share/vm/opto/lcm.cpp
+++ b/hotspot/src/share/vm/opto/lcm.cpp
@ -464,7 +464,9 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
        iop == Op_CreateEx ||   // Create-exception must start block
        iop == Op_CheckCastPP
        ) {
-      worklist.map(i,worklist.pop());
+      // select the node n
+      // remove n from worklist and retain the order of remaining nodes
+      worklist.remove((uint)i);
      return n;
    }

@ -550,7 +552,9 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
  assert(idx >= 0, "index should be set");
  Node *n = worklist[(uint)idx];      // Get the winner

-  worklist.map((uint)idx, worklist.pop());     // Compress worklist
+  // select the node n
+  // remove n from worklist and retain the order of remaining nodes
+  worklist.remove((uint)idx);
  return n;
 }

--- a/hotspot/src/share/vm/opto/library_call.cpp
+++ b/hotspot/src/share/vm/opto/library_call.cpp
@ -1905,7 +1905,7 @@ bool LibraryCallKit::inline_pow() {
    Node *bolyplus1 = _gvn.transform(new BoolNode( cmpyplus1, BoolTest::eq ));
    Node* correctedsign = NULL;
    if (ConditionalMoveLimit != 0) {
-      correctedsign = _gvn.transform( CMoveNode::make(C, NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
+      correctedsign = _gvn.transform(CMoveNode::make(NULL, bolyplus1, signnode, longcon(0), TypeLong::LONG));
    } else {
      IfNode *ifyplus1 = create_and_xform_if(ylong_path,bolyplus1, PROB_FAIR, COUNT_UNKNOWN);
      RegionNode *r = new RegionNode(3);
@ -1934,7 +1934,7 @@ bool LibraryCallKit::inline_pow() {
    // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
    Node *signresult = NULL;
    if (ConditionalMoveLimit != 0) {
-      signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
+      signresult = _gvn.transform(CMoveNode::make(NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
    } else {
      IfNode *ifyeven = create_and_xform_if(ylong_path,bol3, PROB_FAIR, COUNT_UNKNOWN);
      RegionNode *r = new RegionNode(3);
@ -2268,7 +2268,7 @@ LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
  // which could hinder other optimizations.
  // Since Math.min/max is often used with arraycopy, we want
  // tightly_coupled_allocation to be able to see beyond min/max expressions.
-  Node* cmov = CMoveNode::make(C, NULL, best_bol,
+  Node* cmov = CMoveNode::make(NULL, best_bol,
                               answer_if_false, answer_if_true,
                               TypeInt::make(lo, hi, widen));

--- a/hotspot/src/share/vm/opto/loopPredicate.cpp
+++ b/hotspot/src/share/vm/opto/loopPredicate.cpp
@ -107,8 +107,7 @@ ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node*
    rgn = new RegionNode(1);
    rgn->add_req(uncommon_proj);
    register_control(rgn, loop, uncommon_proj);
-    _igvn.hash_delete(call);
-    call->set_req(0, rgn);
+    _igvn.replace_input_of(call, 0, rgn);
    // When called from beautify_loops() idom is not constructed yet.
    if (_idom != NULL) {
      set_idom(call, rgn, dom_depth(rgn));
@ -166,8 +165,7 @@ ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node*

  if (new_entry == NULL) {
    // Attach if_cont to iff
-    _igvn.hash_delete(iff);
-    iff->set_req(0, if_cont);
+    _igvn.replace_input_of(iff, 0, if_cont);
    if (_idom != NULL) {
      set_idom(iff, if_cont, dom_depth(iff));
    }
@ -194,8 +192,7 @@ ProjNode* PhaseIterGVN::create_new_if_for_predicate(ProjNode* cont_proj, Node* n
    rgn = new RegionNode(1);
    register_new_node_with_optimizer(rgn);
    rgn->add_req(uncommon_proj);
-    hash_delete(call);
-    call->set_req(0, rgn);
+    replace_input_of(call, 0, rgn);
  } else {
    // Find region's edge corresponding to uncommon_proj
    for (; proj_index < rgn->req(); proj_index++)
--- a/hotspot/src/share/vm/opto/loopTransform.cpp
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp
@ -924,15 +924,13 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
  if( bol->outcnt() != 1 ) {
    bol = bol->clone();
    register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
-    _igvn.hash_delete(main_end);
-    main_end->set_req(CountedLoopEndNode::TestValue, bol);
+    _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, bol);
  }
  // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
  if( cmp->outcnt() != 1 ) {
    cmp = cmp->clone();
    register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
-    _igvn.hash_delete(bol);
-    bol->set_req(1, cmp);
+    _igvn.replace_input_of(bol, 1, cmp);
  }

  //------------------------------
@ -1118,8 +1116,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
    Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
    BoolNode* new_bol0 = new BoolNode(pre_bol->in(1), new_test);
    register_new_node( new_bol0, pre_head->in(0) );
-    _igvn.hash_delete(pre_end);
-    pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0);
+    _igvn.replace_input_of(pre_end, CountedLoopEndNode::TestValue, new_bol0);
    // Modify main loop guard condition
    assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
    BoolNode* new_bol1 = new BoolNode(min_bol->in(1), new_test);
@ -1130,8 +1127,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
    BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
    BoolNode* new_bol2 = new BoolNode(main_bol->in(1), new_test);
    register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
-    _igvn.hash_delete(main_end);
-    main_end->set_req(CountedLoopEndNode::TestValue, new_bol2);
+    _igvn.replace_input_of(main_end, CountedLoopEndNode::TestValue, new_bol2);
  }

  // Flag main loop
@ -1346,8 +1342,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
        Node* bol2 = loop_end->in(1)->clone();
        bol2->set_req(1, cmp2);
        register_new_node(bol2, ctrl2);
-        _igvn.hash_delete(loop_end);
-        loop_end->set_req(1, bol2);
+        _igvn.replace_input_of(loop_end, 1, bol2);
      }
      // Step 3: Find the min-trip test guaranteed before a 'main' loop.
      // Make it a 1-trip test (means at least 2 trips).
@ -1356,8 +1351,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
      // can edit it's inputs directly.  Hammer in the new limit for the
      // minimum-trip guard.
      assert(opaq->outcnt() == 1, "");
-      _igvn.hash_delete(opaq);
-      opaq->set_req(1, new_limit);
+      _igvn.replace_input_of(opaq, 1, new_limit);
    }

    // Adjust max trip count. The trip count is intentionally rounded
@ -1407,8 +1401,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
    register_new_node( cmp2, ctrl2 );
    Node *bol2 = new BoolNode( cmp2, loop_end->test_trip() );
    register_new_node( bol2, ctrl2 );
-    _igvn.hash_delete(loop_end);
-    loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+    _igvn.replace_input_of(loop_end, CountedLoopEndNode::TestValue, bol2);

    // Step 3: Find the min-trip test guaranteed before a 'main' loop.
    // Make it a 1-trip test (means at least 2 trips).
@ -1997,8 +1990,7 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
                                 : (Node*)new MaxINode(pre_limit, orig_limit);
    register_new_node(pre_limit, pre_ctrl);
  }
-  _igvn.hash_delete(pre_opaq);
-  pre_opaq->set_req(1, pre_limit);
+  _igvn.replace_input_of(pre_opaq, 1, pre_limit);

  // Note:: we are making the main loop limit no longer precise;
  // need to round up based on stride.
@ -2027,10 +2019,9 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
  Node *main_bol = main_cle->in(1);
  // Hacking loop bounds; need private copies of exit test
  if( main_bol->outcnt() > 1 ) {// BoolNode shared?
-    _igvn.hash_delete(main_cle);
    main_bol = main_bol->clone();// Clone a private BoolNode
    register_new_node( main_bol, main_cle->in(0) );
-    main_cle->set_req(1,main_bol);
+    _igvn.replace_input_of(main_cle, 1, main_bol);
  }
  Node *main_cmp = main_bol->in(1);
  if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
--- a/hotspot/src/share/vm/opto/loopnode.cpp
+++ b/hotspot/src/share/vm/opto/loopnode.cpp
@ -133,7 +133,7 @@ Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
  // Return earliest legal location
  assert(early == find_non_split_ctrl(early), "unexpected early control");

-  if (n->is_expensive()) {
+  if (n->is_expensive() && !_verify_only && !_verify_me) {
    assert(n->in(0), "should have control input");
    early = get_early_ctrl_for_expensive(n, early);
  }
@ -226,8 +226,7 @@ Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
  }

  if (ctl != n->in(0)) {
-    _igvn.hash_delete(n);
-    n->set_req(0, ctl);
+    _igvn.replace_input_of(n, 0, ctl);
    _igvn.hash_insert(n);
  }

@ -521,8 +520,7 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
    assert(check_iff->in(1)->Opcode() == Op_Conv2B &&
           check_iff->in(1)->in(1)->Opcode() == Op_Opaque1, "");
    Node* opq = check_iff->in(1)->in(1);
-    _igvn.hash_delete(opq);
-    opq->set_req(1, bol);
+    _igvn.replace_input_of(opq, 1, bol);
    // Update ctrl.
    set_ctrl(opq, check_iff->in(0));
    set_ctrl(check_iff->in(1), check_iff->in(0));
@ -690,7 +688,7 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
  incr->set_req(2,stride);
  incr = _igvn.register_new_node_with_optimizer(incr);
  set_early_ctrl( incr );
-  _igvn.hash_delete(phi);
+  _igvn.rehash_node_delayed(phi);
  phi->set_req_X( LoopNode::LoopBackControl, incr, &_igvn );

  // If phi type is more restrictive than Int, raise to
@ -743,8 +741,8 @@ bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
    iffalse = iff2;
    iftrue  = ift2;
  } else {
-    _igvn.hash_delete(iffalse);
-    _igvn.hash_delete(iftrue);
+    _igvn.rehash_node_delayed(iffalse);
+    _igvn.rehash_node_delayed(iftrue);
    iffalse->set_req_X( 0, le, &_igvn );
    iftrue ->set_req_X( 0, le, &_igvn );
  }
@ -1257,6 +1255,7 @@ void IdealLoopTree::split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ) {
      _head->del_req(i);
    }
  }
+  igvn.rehash_node_delayed(_head);
  // Transform landing pad
  igvn.register_new_node_with_optimizer(landing_pad, _head);
  // Insert landing pad into the header
@ -1397,7 +1396,7 @@ void IdealLoopTree::merge_many_backedges( PhaseIdealLoop *phase ) {
  igvn.register_new_node_with_optimizer(r, _head);
  // Plug region into end of loop _head, followed by hot_tail
  while( _head->req() > 3 ) _head->del_req( _head->req()-1 );
-  _head->set_req(2, r);
+  igvn.replace_input_of(_head, 2, r);
  if( hot_idx ) _head->add_req(hot_tail);

  // Split all the Phis up between '_head' loop and the Region 'r'
@ -1419,7 +1418,7 @@ void IdealLoopTree::merge_many_backedges( PhaseIdealLoop *phase ) {
      igvn.register_new_node_with_optimizer(phi, n);
      // Add the merge phi to the old Phi
      while( n->req() > 3 ) n->del_req( n->req()-1 );
-      n->set_req(2, phi);
+      igvn.replace_input_of(n, 2, phi);
      if( hot_idx ) n->add_req(hot_phi);
    }
  }
@ -1495,13 +1494,14 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
  if( fall_in_cnt > 1 ) {
    // Since I am just swapping inputs I do not need to update def-use info
    Node *tmp = _head->in(1);
+    igvn.rehash_node_delayed(_head);
    _head->set_req( 1, _head->in(fall_in_cnt) );
    _head->set_req( fall_in_cnt, tmp );
    // Swap also all Phis
    for (DUIterator_Fast imax, i = _head->fast_outs(imax); i < imax; i++) {
      Node* phi = _head->fast_out(i);
      if( phi->is_Phi() ) {
-        igvn.hash_delete(phi); // Yank from hash before hacking edges
+        igvn.rehash_node_delayed(phi); // Yank from hash before hacking edges
        tmp = phi->in(1);
        phi->set_req( 1, phi->in(fall_in_cnt) );
        phi->set_req( fall_in_cnt, tmp );
@ -2905,6 +2905,7 @@ int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
          uint k = 0;             // Probably cfg->in(0)
          while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
          cfg->set_req( k, if_t ); // Now point to NeverBranch
+          _igvn._worklist.push(cfg);

          // Now create the never-taken loop exit
          Node *if_f = new CProjNode( iff, 1 );
--- a/hotspot/src/share/vm/opto/loopopts.cpp
+++ b/hotspot/src/share/vm/opto/loopopts.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -625,7 +625,7 @@ Node *PhaseIdealLoop::conditional_move( Node *region ) {
        set_ctrl(inp, cmov_ctrl);
      }
    }
-    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
+    Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
    register_new_node( cmov, cmov_ctrl );
    _igvn.replace_node( phi, cmov );
 #ifndef PRODUCT
@ -2574,7 +2574,7 @@ bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
  _igvn.register_new_node_with_optimizer(new_head);
  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
-  first_not_peeled->set_req(0, new_head);
+  _igvn.replace_input_of(first_not_peeled, 0, new_head);
  set_loop(new_head, loop);
  loop->_body.push(new_head);
  not_peel.set(new_head->_idx);
--- a/hotspot/src/share/vm/opto/machnode.cpp
+++ b/hotspot/src/share/vm/opto/machnode.cpp
@ -178,7 +178,7 @@ uint MachNode::cmp( const Node &node ) const {
 }

 // Return an equivalent instruction using memory for cisc_operand position
-MachNode *MachNode::cisc_version(int offset, Compile* C) {
+MachNode *MachNode::cisc_version(int offset) {
  ShouldNotCallThis();
  return NULL;
 }
@ -411,7 +411,7 @@ int MachNode::operand_index(const MachOper *oper) const {

 //------------------------------peephole---------------------------------------
 // Apply peephole rule(s) to this instruction
-MachNode *MachNode::peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C ) {
+MachNode *MachNode::peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted) {
  return NULL;
 }

--- a/hotspot/src/share/vm/opto/machnode.hpp
+++ b/hotspot/src/share/vm/opto/machnode.hpp
@ -152,7 +152,7 @@ public:
  virtual uint  cmp( const MachOper &oper ) const;

  // Virtual clone, since I do not know how big the MachOper is.
-  virtual MachOper *clone(Compile* C) const = 0;
+  virtual MachOper *clone() const = 0;

  // Return ideal Type from simple operands.  Fail for complex operands.
  virtual const Type *type() const;
@ -202,10 +202,10 @@ public:
  // Copy inputs and operands to new node of instruction.
  // Called from cisc_version() and short_branch_version().
  // !!!! The method's body is defined in ad_<arch>.cpp file.
-  void fill_new_machnode(MachNode *n, Compile* C) const;
+  void fill_new_machnode(MachNode *n) const;

  // Return an equivalent instruction using memory for cisc_operand position
-  virtual MachNode *cisc_version(int offset, Compile* C);
+  virtual MachNode *cisc_version(int offset);
  // Modify this instruction's register mask to use stack version for cisc_operand
  virtual void use_cisc_RegMask();

@ -317,7 +317,7 @@ public:
  virtual const class TypePtr *adr_type() const;

  // Apply peephole rule(s) to this instruction
-  virtual MachNode *peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C );
+  virtual MachNode *peephole(Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted);

  // Top-level ideal Opcode matched
  virtual int ideal_Opcode()     const { return Op_Node; }
@ -627,7 +627,7 @@ public:
  virtual void save_label(Label** label, uint* block_num) = 0;

  // Support for short branches
-  virtual MachNode *short_branch_version(Compile* C) { return NULL; }
+  virtual MachNode *short_branch_version() { return NULL; }

  virtual bool pinned() const { return true; };
 };
@ -985,7 +985,7 @@ public:

  labelOper(labelOper* l) : _label(l->_label) , _block_num(l->_block_num) {}

-  virtual MachOper *clone(Compile* C) const;
+  virtual MachOper *clone() const;

  virtual Label *label() const { assert(_label != NULL, "need Label"); return _label; }

@ -1012,7 +1012,7 @@ public:
  methodOper() :   _method(0) {}
  methodOper(intptr_t method) : _method(method)  {}

-  virtual MachOper *clone(Compile* C) const;
+  virtual MachOper *clone() const;

  virtual intptr_t method() const { return _method; }

--- a/hotspot/src/share/vm/opto/macro.cpp
+++ b/hotspot/src/share/vm/opto/macro.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -702,6 +702,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
  ciType* elem_type;

  Node* res = alloc->result_cast();
+  assert(res == NULL || res->is_CheckCastPP(), "unexpected AllocateNode result");
  const TypeOopPtr* res_type = NULL;
  if (res != NULL) { // Could be NULL when there are no users
    res_type = _igvn.type(res)->isa_oopptr();
@ -791,6 +792,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
        for (int k = 0;  k < j; k++) {
          sfpt->del_req(last--);
        }
+        _igvn._worklist.push(sfpt);
        // rollback processed safepoints
        while (safepoints_done.length() > 0) {
          SafePointNode* sfpt_done = safepoints_done.pop();
@ -815,6 +817,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
              }
            }
          }
+          _igvn._worklist.push(sfpt_done);
        }
 #ifndef PRODUCT
        if (PrintEliminateAllocations) {
@ -855,6 +858,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
    int start = jvms->debug_start();
    int end   = jvms->debug_end();
    sfpt->replace_edges_in_range(res, sobj, start, end);
+    _igvn._worklist.push(sfpt);
    safepoints_done.append_if_missing(sfpt); // keep it for rollback
  }
  return true;
@ -1034,6 +1038,8 @@ bool PhaseMacroExpand::eliminate_boxing_node(CallStaticJavaNode *boxing) {
    return false;
  }

+  assert(boxing->result_cast() == NULL, "unexpected boxing node result");
+
  extract_call_projections(boxing);

  const TypeTuple* r = boxing->tf()->range();
@ -1775,6 +1781,7 @@ Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
      Node *pf_region = new RegionNode(3);
      Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
                                             TypeRawPtr::BOTTOM );
+      transform_later(pf_region);

      // Generate several prefetch instructions.
      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@ -305,7 +305,7 @@ void Matcher::match( ) {
  // to avoid false sharing if the corresponding mach node is not used.
  // The corresponding mach node is only used in rare cases for derived
  // pointers.
-  Node* new_ideal_null = ConNode::make(C, TypePtr::NULL_PTR);
+  Node* new_ideal_null = ConNode::make(TypePtr::NULL_PTR);

  // Swap out to old-space; emptying new-space
  Arena *old = C->node_arena()->move_contents(C->old_arena());
@ -1643,8 +1643,8 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  }

  // Build the object to represent this state & prepare for recursive calls
-  MachNode *mach = s->MachNodeGenerator( rule, C );
-  mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
+  MachNode *mach = s->MachNodeGenerator(rule);
+  mach->_opnds[0] = s->MachOperGenerator(_reduceOp[rule]);
  assert( mach->_opnds[0] != NULL, "Missing result operand" );
  Node *leaf = s->_leaf;
  // Check for instruction or instruction chain rule
@ -1756,13 +1756,13 @@ void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *m
    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
            "Bad AD file: Instruction chain rule must chain from operand");
    // Insert operand into array of operands for this instruction
-    mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );
+    mach->_opnds[1] = s->MachOperGenerator(opnd_class_instance);

    ReduceOper( s, newrule, mem, mach );
  } else {
    // Chain from the result of an instruction
    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
-    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
+    mach->_opnds[1] = s->MachOperGenerator(_reduceOp[catch_op]);
    Node *mem1 = (Node*)1;
    debug_only(Node *save_mem_node = _mem_node;)
    mach->add_req( ReduceInst(s, newrule, mem1) );
@ -1807,7 +1807,7 @@ uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mac
    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
      // Operand/operandClass
      // Insert operand into array of operands for this instruction
-      mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
+      mach->_opnds[num_opnds++] = newstate->MachOperGenerator(opnd_class_instance);
      ReduceOper( newstate, newrule, mem, mach );

    } else {                    // Child is internal operand or new instruction
@ -1818,7 +1818,7 @@ uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mac
      } else {
        // instruction --> call build operand(  ) to catch result
        //             --> ReduceInst( newrule )
-        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
+        mach->_opnds[num_opnds++] = s->MachOperGenerator(_reduceOp[catch_op]);
        Node *mem1 = (Node*)1;
        debug_only(Node *save_mem_node = _mem_node;)
        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
--- a/hotspot/src/share/vm/opto/mathexactnode.cpp
+++ b/hotspot/src/share/vm/opto/mathexactnode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -191,7 +191,7 @@ struct IdealHelper {
      NativeType val1 = TypeClass::as_self(type1)->get_con();
      NativeType val2 = TypeClass::as_self(type2)->get_con();
      if (node->will_overflow(val1, val2) == false) {
-        Node* con_result = ConINode::make(phase->C, 0);
+        Node* con_result = ConINode::make(0);
        return con_result;
      }
      return NULL;
--- a/hotspot/src/share/vm/opto/memnode.cpp
+++ b/hotspot/src/share/vm/opto/memnode.cpp
@ -933,12 +933,12 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
  return (LoadNode*)NULL;
 }

-LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
+LoadLNode* LoadLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
  bool require_atomic = true;
  return new LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), mo, require_atomic);
 }

-LoadDNode* LoadDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
+LoadDNode* LoadDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, MemOrd mo) {
  bool require_atomic = true;
  return new LoadDNode(ctl, mem, adr, adr_type, rt, mo, require_atomic);
 }
@ -1471,6 +1471,7 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {

  Node* ctrl    = in(MemNode::Control);
  Node* address = in(MemNode::Address);
+  bool progress = false;

  // Skip up past a SafePoint control.  Cannot do this for Stores because
  // pointer stores & cardmarks must stay on the same side of a SafePoint.
@ -1478,6 +1479,7 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
      phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) {
    ctrl = ctrl->in(0);
    set_req(MemNode::Control,ctrl);
+    progress = true;
  }

  intptr_t ignore = 0;
@ -1490,6 +1492,7 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
        && all_controls_dominate(base, phase->C->start())) {
      // A method-invariant, non-null address (constant or 'this' argument).
      set_req(MemNode::Control, NULL);
+      progress = true;
    }
  }

@ -1550,7 +1553,7 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    }
  }

-  return NULL;                  // No further progress
+  return progress ? this : NULL;
 }

 // Helper to recognize certain Klass fields which are invariant across
@ -2014,7 +2017,6 @@ const Type* LoadSNode::Value(PhaseTransform *phase) const {
 //----------------------------LoadKlassNode::make------------------------------
 // Polymorphic factory method:
 Node *LoadKlassNode::make( PhaseGVN& gvn, Node *mem, Node *adr, const TypePtr* at, const TypeKlassPtr *tk ) {
-  Compile* C = gvn.C;
  Node *ctl = NULL;
  // sanity check the alias category against the created node type
  const TypePtr *adr_type = adr->bottom_type()->isa_ptr();
@ -2379,12 +2381,12 @@ StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const
  return (StoreNode*)NULL;
 }

-StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
+StoreLNode* StoreLNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
  bool require_atomic = true;
  return new StoreLNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
 }

-StoreDNode* StoreDNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
+StoreDNode* StoreDNode::make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo) {
  bool require_atomic = true;
  return new StoreDNode(ctl, mem, adr, adr_type, val, mo, require_atomic);
 }
@ -2460,7 +2462,7 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
      // and I need to disappear.
      if (moved != NULL) {
        // %%% hack to ensure that Ideal returns a new node:
-        mem = MergeMemNode::make(phase->C, mem);
+        mem = MergeMemNode::make(mem);
        return mem;             // fold me away
      }
    }
@ -2820,7 +2822,6 @@ Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
                                   intptr_t start_offset,
                                   Node* end_offset,
                                   PhaseGVN* phase) {
-  Compile* C = phase->C;
  intptr_t offset = start_offset;

  int unit = BytesPerLong;
@ -2847,7 +2848,6 @@ Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
    return mem;
  }

-  Compile* C = phase->C;
  int unit = BytesPerLong;
  Node* zbase = start_offset;
  Node* zend  = end_offset;
@ -2875,7 +2875,6 @@ Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
    return mem;
  }

-  Compile* C = phase->C;
  assert((end_offset % BytesPerInt) == 0, "odd end offset");
  intptr_t done_offset = end_offset;
  if ((done_offset % BytesPerLong) != 0) {
@ -2944,6 +2943,7 @@ Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    return NULL;
  }

+  bool progress = false;
  // Eliminate volatile MemBars for scalar replaced objects.
  if (can_reshape && req() == (Precedent+1)) {
    bool eliminate = false;
@ -2966,6 +2966,7 @@ Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
          phase->is_IterGVN()->_worklist.push(my_mem); // remove dead node later
          my_mem = NULL;
        }
+        progress = true;
      }
      if (my_mem != NULL && my_mem->is_Mem()) {
        const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr();
@ -2995,7 +2996,7 @@ Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
      return new ConINode(TypeInt::ZERO);
    }
  }
-  return NULL;
+  return progress ? this : NULL;
 }

 //------------------------------Value------------------------------------------
@ -3497,6 +3498,7 @@ Node* InitializeNode::capture_store(StoreNode* st, intptr_t start,
  // if it redundantly stored the same value (or zero to fresh memory).

  // In any case, wire it in:
+  phase->igvn_rehash_node_delayed(this);
  set_req(i, new_st);

  // The caller may now kill the old guy.
@ -4126,7 +4128,7 @@ MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) {

 // Make a new, untransformed MergeMem with the same base as 'mem'.
 // If mem is itself a MergeMem, populate the result with the same edges.
-MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) {
+MergeMemNode* MergeMemNode::make(Node* mem) {
  return new MergeMemNode(mem);
 }

--- a/hotspot/src/share/vm/opto/memnode.hpp
+++ b/hotspot/src/share/vm/opto/memnode.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -333,7 +333,7 @@ public:
  virtual int store_Opcode() const { return Op_StoreL; }
  virtual BasicType memory_type() const { return T_LONG; }
  bool require_atomic_access() const { return _require_atomic_access; }
-  static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+  static LoadLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
                                const Type* rt, MemOrd mo);
 #ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const {
@ -384,7 +384,7 @@ public:
  virtual int store_Opcode() const { return Op_StoreD; }
  virtual BasicType memory_type() const { return T_DOUBLE; }
  bool require_atomic_access() const { return _require_atomic_access; }
-  static LoadDNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
+  static LoadDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type,
                                const Type* rt, MemOrd mo);
 #ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const {
@ -593,7 +593,7 @@ public:
  virtual int Opcode() const;
  virtual BasicType memory_type() const { return T_LONG; }
  bool require_atomic_access() const { return _require_atomic_access; }
-  static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
+  static StoreLNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
 #ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const {
    StoreNode::dump_spec(st);
@ -629,7 +629,7 @@ public:
  virtual int Opcode() const;
  virtual BasicType memory_type() const { return T_DOUBLE; }
  bool require_atomic_access() const { return _require_atomic_access; }
-  static StoreDNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
+  static StoreDNode* make_atomic(Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, MemOrd mo);
 #ifndef PRODUCT
  virtual void dump_spec(outputStream *st) const {
    StoreNode::dump_spec(st);
@ -1138,7 +1138,7 @@ public:
  // If the input is a whole memory state, clone it with all its slices intact.
  // Otherwise, make a new memory state with just that base memory input.
  // In either case, the result is a newly created MergeMem.
-  static MergeMemNode* make(Compile* C, Node* base_memory);
+  static MergeMemNode* make(Node* base_memory);

  virtual int Opcode() const;
  virtual Node *Identity( PhaseTransform *phase );
--- a/hotspot/src/share/vm/opto/movenode.cpp
+++ b/hotspot/src/share/vm/opto/movenode.cpp
@ -88,7 +88,7 @@ Node *CMoveNode::Ideal(PhaseGVN *phase, bool can_reshape) {
    if( in(Condition)->is_Bool() ) {
      BoolNode* b  = in(Condition)->as_Bool();
      BoolNode* b2 = b->negate(phase);
-      return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+      return make(in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type);
    }
  }
  return NULL;
@ -158,7 +158,7 @@ const Type *CMoveNode::Value( PhaseTransform *phase ) const {
 //------------------------------make-------------------------------------------
 // Make a correctly-flavored CMove.  Since _type is directly determined
 // from the inputs we do not need to specify it here.
-CMoveNode *CMoveNode::make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t ) {
+CMoveNode *CMoveNode::make(Node *c, Node *bol, Node *left, Node *right, const Type *t) {
  switch( t->basic_type() ) {
    case T_INT:     return new CMoveINode( bol, left, right, t->is_int() );
    case T_FLOAT:   return new CMoveFNode( bol, left, right, t );
@ -196,7 +196,7 @@ Node *CMoveINode::Ideal(PhaseGVN *phase, bool can_reshape) {
    if( in(Condition)->is_Bool() ) {
      BoolNode* b  = in(Condition)->as_Bool();
      BoolNode* b2 = b->negate(phase);
-      return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+      return make(in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type);
    }
  }

--- a/hotspot/src/share/vm/opto/movenode.hpp
+++ b/hotspot/src/share/vm/opto/movenode.hpp
@ -47,7 +47,7 @@ class CMoveNode : public TypeNode {
  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
  virtual const Type *Value( PhaseTransform *phase ) const;
  virtual Node *Identity( PhaseTransform *phase );
-  static CMoveNode *make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t );
+  static CMoveNode *make(Node *c, Node *bol, Node *left, Node *right, const Type *t);
  // Helper function to spot cmove graph shapes
  static Node *is_cmove_id( PhaseTransform *phase, Node *cmp, Node *t, Node *f, BoolNode *b );
 };
--- a/hotspot/src/share/vm/opto/node.cpp
+++ b/hotspot/src/share/vm/opto/node.cpp
@ -507,7 +507,7 @@ Node *Node::clone() const {
                                  (const void*)(&mthis->_opnds), 1));
    mach->_opnds = to;
    for ( uint i = 0; i < nopnds; ++i ) {
-      to[i] = from[i]->clone(C);
+      to[i] = from[i]->clone();
    }
  }
  // cloning CallNode may need to clone JVMState
@ -620,6 +620,7 @@ void Node::destruct() {
  *(address*)this = badAddress;  // smash the C++ vtbl, probably
  _in = _out = (Node**) badAddress;
  _max = _cnt = _outmax = _outcnt = 0;
+  compile->remove_modified_node(this);
 #endif
 }

@ -765,6 +766,7 @@ void Node::del_req( uint idx ) {
  if (n != NULL) n->del_out((Node *)this);
  _in[idx] = in(--_cnt);  // Compact the array
  _in[_cnt] = NULL;       // NULL out emptied slot
+  Compile::current()->record_modified_node(this);
 }

 //------------------------------del_req_ordered--------------------------------
@ -780,6 +782,7 @@ void Node::del_req_ordered( uint idx ) {
    Copy::conjoint_words_to_lower((HeapWord*)&_in[idx+1], (HeapWord*)&_in[idx], ((_cnt-idx-1)*sizeof(Node*)));
  }
  _in[--_cnt] = NULL;   // NULL out emptied slot
+  Compile::current()->record_modified_node(this);
 }

 //------------------------------ins_req----------------------------------------
@ -1297,6 +1300,7 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
      // Done with outputs.
      igvn->hash_delete(dead);
      igvn->_worklist.remove(dead);
+      igvn->C->remove_modified_node(dead);
      igvn->set_type(dead, Type::TOP);
      if (dead->is_macro()) {
        igvn->C->remove_macro_node(dead);
--- a/hotspot/src/share/vm/opto/node.hpp
+++ b/hotspot/src/share/vm/opto/node.hpp
@ -398,6 +398,7 @@ protected:
    if (*p != NULL)  (*p)->del_out((Node *)this);
    (*p) = n;
    if (n != NULL)      n->add_out((Node *)this);
+    Compile::current()->record_modified_node(this);
  }
  // Light version of set_req() to init inputs after node creation.
  void init_req( uint i, Node *n ) {
@ -409,6 +410,7 @@ protected:
    assert( _in[i] == NULL, "sanity");
    _in[i] = n;
    if (n != NULL)      n->add_out((Node *)this);
+    Compile::current()->record_modified_node(this);
  }
  // Find first occurrence of n among my edges:
  int find_edge(Node* n);
--- a/hotspot/src/share/vm/opto/output.cpp
+++ b/hotspot/src/share/vm/opto/output.cpp
@ -526,7 +526,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size

        if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
          // We've got a winner.  Replace this branch.
-          MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
+          MachNode* replacement = mach->as_MachBranch()->short_branch_version();

          // Update the jmp_size.
          int new_size = replacement->size(_regalloc);
@ -785,9 +785,10 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
    // grow downwards in all implementations.
    // (If, on some machine, the interpreter's Java locals or stack
    // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint   *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = jlong_cast(d);
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
    break;
  }
@ -804,9 +805,10 @@ void Compile::FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
    // grow downwards in all implementations.
    // (If, on some machine, the interpreter's Java locals or stack
    // were to grow upwards, the embedded doubles would be word-swapped.)
-    jint *dp = (jint*)&d;
-    array->append(new ConstantIntValue(dp[1]));
-    array->append(new ConstantIntValue(dp[0]));
+    jlong_accessor acc;
+    acc.long_value = d;
+    array->append(new ConstantIntValue(acc.words[1]));
+    array->append(new ConstantIntValue(acc.words[0]));
 #endif
    break;
  }
@ -1174,7 +1176,7 @@ CodeBuffer* Compile::init_buffer(uint* blk_starts) {

  // fill in the nop array for bundling computations
  MachNode *_nop_list[Bundle::_nop_count];
-  Bundle::initialize_nops(_nop_list, this);
+  Bundle::initialize_nops(_nop_list);

  return cb;
 }
@ -1408,7 +1410,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {

            if (_matcher->is_short_branch_offset(mach->rule(), br_size, offset)) {
              // We've got a winner.  Replace this branch.
-              MachNode* replacement = mach->as_MachBranch()->short_branch_version(this);
+              MachNode* replacement = mach->as_MachBranch()->short_branch_version();

              // Update the jmp_size.
              int new_size = replacement->size(_regalloc);
--- a/hotspot/src/share/vm/opto/parse1.cpp
+++ b/hotspot/src/share/vm/opto/parse1.cpp
@ -575,12 +575,13 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
      decrement_age();
    }
  }
-  if (depth() == 1) {
+
+  if (depth() == 1 && !failing()) {
    // Add check to deoptimize the nmethod if RTM state was changed
    rtm_deopt();
  }

-  // Check for bailouts during method entry.
+  // Check for bailouts during method entry or RTM state check setup.
  if (failing()) {
    if (log)  log->done("parse");
    C->set_default_node_notes(caller_nn);
@ -1756,7 +1757,7 @@ void Parse::merge_memory_edges(MergeMemNode* n, int pnum, bool nophi) {
      if (remerge == NULL) {
        assert(base != NULL, "");
        assert(base->in(0) != NULL, "should not be xformed away");
-        remerge = MergeMemNode::make(C, base->in(pnum));
+        remerge = MergeMemNode::make(base->in(pnum));
        gvn().set_type(remerge, Type::MEMORY);
        base->set_req(pnum, remerge);
      }
@ -2199,7 +2200,7 @@ void Parse::add_safepoint() {
  // down below a SafePoint.

  // Clone the current memory state
-  Node* mem = MergeMemNode::make(C, map()->memory());
+  Node* mem = MergeMemNode::make(map()->memory());

  mem = _gvn.transform(mem);

@ -2213,7 +2214,7 @@ void Parse::add_safepoint() {

  // Create a node for the polling address
  if( add_poll_param ) {
-    Node *polladr = ConPNode::make(C, (address)os::get_polling_page());
+    Node *polladr = ConPNode::make((address)os::get_polling_page());
    sfpnt->init_req(TypeFunc::Parms+0, _gvn.transform(polladr));
  }

--- a/hotspot/src/share/vm/opto/parseHelper.cpp
+++ b/hotspot/src/share/vm/opto/parseHelper.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,7 @@ void GraphKit::make_dtrace_method_entry_exit(ciMethod* method, bool is_entry) {

  // Get method
  const TypePtr* method_type = TypeMetadataPtr::make(method);
-  Node *method_node = _gvn.transform( ConNode::make(C, method_type) );
+  Node *method_node = _gvn.transform(ConNode::make(method_type));

  kill_dead_locals();

--- a/hotspot/src/share/vm/opto/phaseX.cpp
+++ b/hotspot/src/share/vm/opto/phaseX.cpp
@ -615,7 +615,7 @@ ConNode* PhaseTransform::makecon(const Type *t) {
 // Make an idealized constant - one of ConINode, ConPNode, etc.
 ConNode* PhaseValues::uncached_makecon(const Type *t) {
  assert(t->singleton(), "must be a constant");
-  ConNode* x = ConNode::make(C, t);
+  ConNode* x = ConNode::make(t);
  ConNode* k = (ConNode*)hash_find_insert(x); // Value numbering
  if (k == NULL) {
    set_type(x, t);             // Missed, provide type mapping
@ -933,9 +933,32 @@ void PhaseIterGVN::init_verifyPhaseIterGVN() {
  for (int i = 0; i < _verify_window_size; i++) {
    _verify_window[i] = NULL;
  }
+#ifdef ASSERT
+  // Verify that all modified nodes are on _worklist
+  Unique_Node_List* modified_list = C->modified_nodes();
+  while (modified_list != NULL && modified_list->size()) {
+    Node* n = modified_list->pop();
+    if (n->outcnt() != 0 && !n->is_Con() && !_worklist.member(n)) {
+      n->dump();
+      assert(false, "modified node is not on IGVN._worklist");
+    }
+  }
+#endif
 }

 void PhaseIterGVN::verify_PhaseIterGVN() {
+#ifdef ASSERT
+  // Verify nodes with changed inputs.
+  Unique_Node_List* modified_list = C->modified_nodes();
+  while (modified_list != NULL && modified_list->size()) {
+    Node* n = modified_list->pop();
+    if (n->outcnt() != 0 && !n->is_Con()) { // skip dead and Con nodes
+      n->dump();
+      assert(false, "modified node was not processed by IGVN.transform_old()");
+    }
+  }
+#endif
+
  C->verify_graph_edges();
  if( VerifyOpto && allow_progress() ) {
    // Must turn off allow_progress to enable assert and break recursion
@ -964,6 +987,14 @@ void PhaseIterGVN::verify_PhaseIterGVN() {
                  (int) _verify_counter, (int) _verify_full_passes);
    }
  }
+
+#ifdef ASSERT
+  while (modified_list->size()) {
+    Node* n = modified_list->pop();
+    n->dump();
+    assert(false, "VerifyIterativeGVN: new modified node was added");
+  }
+#endif
 }
 #endif /* PRODUCT */

@ -1066,6 +1097,7 @@ Node *PhaseIterGVN::transform_old(Node* n) {
  Node* k = n;
  DEBUG_ONLY(dead_loop_check(k);)
  DEBUG_ONLY(bool is_new = (k->outcnt() == 0);)
+  C->remove_modified_node(k);
  Node* i = k->Ideal(this, /*can_reshape=*/true);
  assert(i != k || is_new || i->outcnt() > 0, "don't return dead nodes");
 #ifndef PRODUCT
@ -1107,6 +1139,7 @@ Node *PhaseIterGVN::transform_old(Node* n) {
    DEBUG_ONLY(dead_loop_check(k);)
    // Try idealizing again
    DEBUG_ONLY(is_new = (k->outcnt() == 0);)
+    C->remove_modified_node(k);
    i = k->Ideal(this, /*can_reshape=*/true);
    assert(i != k || is_new || (i->outcnt() > 0), "don't return dead nodes");
 #ifndef PRODUCT
@ -1259,6 +1292,7 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
      _stack.pop();
      // Remove dead node from iterative worklist
      _worklist.remove(dead);
+      C->remove_modified_node(dead);
      // Constant node that has no out-edges and has only one in-edge from
      // root is usually dead. However, sometimes reshaping walk makes
      // it reachable by adding use edges. So, we will NOT count Con nodes
@ -1288,7 +1322,7 @@ void PhaseIterGVN::subsume_node( Node *old, Node *nn ) {
  for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) {
    Node* use = old->last_out(i);  // for each use...
    // use might need re-hashing (but it won't if it's a new node)
-    bool is_in_table = _table.hash_delete( use );
+    rehash_node_delayed(use);
    // Update use-def info as well
    // We remove all occurrences of old within use->in,
    // so as to avoid rehashing any node more than once.
@ -1300,11 +1334,6 @@ void PhaseIterGVN::subsume_node( Node *old, Node *nn ) {
        ++num_edges;
      }
    }
-    // Insert into GVN hash table if unique
-    // If a duplicate, 'use' will be cleaned up when pulled off worklist
-    if( is_in_table ) {
-      hash_find_insert(use);
-    }
    i -= num_edges;    // we deleted 1 or more copies of this edge
  }

@ -1599,7 +1628,7 @@ Node *PhaseCCP::transform_once( Node *n ) {
    if( t == Type::TOP ) {
      // cache my top node on the Compile instance
      if( C->cached_top_node() == NULL || C->cached_top_node()->in(0) == NULL ) {
-        C->set_cached_top_node( ConNode::make(C, Type::TOP) );
+        C->set_cached_top_node(ConNode::make(Type::TOP));
        set_type(C->top(), Type::TOP);
      }
      nn = C->top();
@ -1725,7 +1754,7 @@ void PhasePeephole::do_transform() {
        MachNode *m = n->as_Mach();
        int deleted_count = 0;
        // check for peephole opportunities
-        MachNode *m2 = m->peephole( block, instruction_index, _regalloc, deleted_count, C );
+        MachNode *m2 = m->peephole(block, instruction_index, _regalloc, deleted_count);
        if( m2 != NULL ) {
 #ifndef PRODUCT
          if( PrintOptoPeephole ) {
--- a/hotspot/src/share/vm/opto/phaseX.hpp
+++ b/hotspot/src/share/vm/opto/phaseX.hpp
@ -311,6 +311,9 @@ public:
                               const Type* limit_type) const
  { ShouldNotCallThis(); return NULL; }

+  // Delayed node rehash if this is an IGVN phase
+  virtual void igvn_rehash_node_delayed(Node* n) {}
+
 #ifndef PRODUCT
  void dump_old2new_map() const;
  void dump_new( uint new_lidx ) const;
@ -488,6 +491,10 @@ public:
    _worklist.push(n);
  }

+  void igvn_rehash_node_delayed(Node* n) {
+    rehash_node_delayed(n);
+  }
+
  // Replace ith edge of "n" with "in"
  void replace_input_of(Node* n, int i, Node* in) {
    rehash_node_delayed(n);
--- a/hotspot/src/share/vm/opto/rootnode.cpp
+++ b/hotspot/src/share/vm/opto/rootnode.cpp
@ -35,10 +35,12 @@
 //------------------------------Ideal------------------------------------------
 // Remove dead inputs
 Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  bool modified = false;
  for( uint i = 1; i < req(); i++ ) { // For all inputs
    // Check for and remove dead inputs
    if( phase->type(in(i)) == Type::TOP ) {
      del_req(i--);             // Delete TOP inputs
+      modified = true;
    }
  }

@ -56,7 +58,7 @@ Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
  // If we want to get the rest of the win later, we should pattern match
  // simple recursive call trees to closed-form solutions.

-  return NULL;                  // No further opportunities exposed
+  return modified ? this : NULL;
 }

 //=============================================================================
--- a/hotspot/src/share/vm/opto/stringopts.cpp
+++ b/hotspot/src/share/vm/opto/stringopts.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1438,7 +1438,7 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
  }
  // Make sure the memory state is a MergeMem for parsing.
  if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
-    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+    map->set_req(TypeFunc::Memory, MergeMemNode::make(map->in(TypeFunc::Memory)));
  }

  jvms->set_map(map);
--- a/hotspot/src/share/vm/opto/subnode.cpp
+++ b/hotspot/src/share/vm/opto/subnode.cpp
@ -1168,7 +1168,6 @@ uint BoolNode::cmp( const Node &n ) const {
 Node* BoolNode::make_predicate(Node* test_value, PhaseGVN* phase) {
  if (test_value->is_Con())   return test_value;
  if (test_value->is_Bool())  return test_value;
-  Compile* C = phase->C;
  if (test_value->is_CMove() &&
      test_value->in(CMoveNode::Condition)->is_Bool()) {
    BoolNode*   bol   = test_value->in(CMoveNode::Condition)->as_Bool();
@ -1191,7 +1190,7 @@ Node* BoolNode::make_predicate(Node* test_value, PhaseGVN* phase) {
 //--------------------------------as_int_value---------------------------------
 Node* BoolNode::as_int_value(PhaseGVN* phase) {
  // Inverse to make_predicate.  The CMove probably boils down to a Conv2B.
-  Node* cmov = CMoveNode::make(phase->C, NULL, this,
+  Node* cmov = CMoveNode::make(NULL, this,
                               phase->intcon(0), phase->intcon(1),
                               TypeInt::BOOL);
  return phase->transform(cmov);
@ -1199,7 +1198,6 @@ Node* BoolNode::as_int_value(PhaseGVN* phase) {

 //----------------------------------negate-------------------------------------
 BoolNode* BoolNode::negate(PhaseGVN* phase) {
-  Compile* C = phase->C;
  return new BoolNode(in(1), _test.negate());
 }

--- a/hotspot/src/share/vm/opto/superword.cpp
+++ b/hotspot/src/share/vm/opto/superword.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2014, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1378,9 +1378,23 @@ void SuperWord::output() {
      if (n->is_Load()) {
        Node* ctl = n->in(MemNode::Control);
        Node* mem = first->in(MemNode::Memory);
+        SWPointer p1(n->as_Mem(), this);
+        // Identify the memory dependency for the new loadVector node by
+        // walking up through memory chain.
+        // This is done to give flexibility to the new loadVector node so that
+        // it can move above independent storeVector nodes.
+        while (mem->is_StoreVector()) {
+          SWPointer p2(mem->as_Mem(), this);
+          int cmp = p1.cmp(p2);
+          if (SWPointer::not_equal(cmp) || !SWPointer::comparable(cmp)) {
+            mem = mem->in(MemNode::Memory);
+          } else {
+            break; // dependent memory
+          }
+        }
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
-        vn = LoadVectorNode::make(C, opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
+        vn = LoadVectorNode::make(opc, ctl, mem, adr, atyp, vlen, velt_basic_type(n));
        vlen_in_bytes = vn->as_LoadVector()->memory_size();
      } else if (n->is_Store()) {
        // Promote value to be stored to vector
@ -1389,7 +1403,7 @@ void SuperWord::output() {
        Node* mem = first->in(MemNode::Memory);
        Node* adr = low_adr->in(MemNode::Address);
        const TypePtr* atyp = n->adr_type();
-        vn = StoreVectorNode::make(C, opc, ctl, mem, adr, atyp, val, vlen);
+        vn = StoreVectorNode::make(opc, ctl, mem, adr, atyp, val, vlen);
        vlen_in_bytes = vn->as_StoreVector()->memory_size();
      } else if (n->req() == 3) {
        // Promote operands to vector
@ -1401,7 +1415,7 @@ void SuperWord::output() {
          in1 = in2;
          in2 = tmp;
        }
-        vn = VectorNode::make(C, opc, in1, in2, vlen, velt_basic_type(n));
+        vn = VectorNode::make(opc, in1, in2, vlen, velt_basic_type(n));
        vlen_in_bytes = vn->as_Vector()->length_in_bytes();
      } else {
        ShouldNotReachHere();
@ -1450,11 +1464,11 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
      if (t != NULL && t->is_con()) {
        juint shift = t->get_con();
        if (shift > mask) { // Unsigned cmp
-          cnt = ConNode::make(C, TypeInt::make(shift & mask));
+          cnt = ConNode::make(TypeInt::make(shift & mask));
        }
      } else {
        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
-          cnt = ConNode::make(C, TypeInt::make(mask));
+          cnt = ConNode::make(TypeInt::make(mask));
          _igvn.register_new_node_with_optimizer(cnt);
          cnt = new AndINode(opd, cnt);
          _igvn.register_new_node_with_optimizer(cnt);
@ -1462,7 +1476,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
        }
        assert(opd->bottom_type()->isa_int(), "int type only");
        // Move non constant shift count into vector register.
-        cnt = VectorNode::shift_count(C, p0, cnt, vlen, velt_basic_type(p0));
+        cnt = VectorNode::shift_count(p0, cnt, vlen, velt_basic_type(p0));
      }
      if (cnt != opd) {
        _igvn.register_new_node_with_optimizer(cnt);
@ -1475,7 +1489,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {
    // p0's vector. Use p0's type because size of operand's container in
    // vector should match p0's size regardless operand's size.
    const Type* p0_t = velt_type(p0);
-    VectorNode* vn = VectorNode::scalar2vector(_phase->C, opd, vlen, p0_t);
+    VectorNode* vn = VectorNode::scalar2vector(opd, vlen, p0_t);

    _igvn.register_new_node_with_optimizer(vn);
    _phase->set_ctrl(vn, _phase->get_ctrl(opd));
@ -1490,7 +1504,7 @@ Node* SuperWord::vector_opd(Node_List* p, int opd_idx) {

  // Insert pack operation
  BasicType bt = velt_basic_type(p0);
-  PackNode* pk = PackNode::make(_phase->C, opd, vlen, bt);
+  PackNode* pk = PackNode::make(opd, vlen, bt);
  DEBUG_ONLY( const BasicType opd_bt = opd->bottom_type()->basic_type(); )

  for (uint i = 1; i < vlen; i++) {
@ -1546,7 +1560,7 @@ void SuperWord::insert_extracts(Node_List* p) {
    _igvn.hash_delete(def);
    int def_pos = alignment(def) / data_size(def);

-    Node* ex = ExtractNode::make(_phase->C, def, def_pos, velt_basic_type(def));
+    Node* ex = ExtractNode::make(def, def_pos, velt_basic_type(def));
    _igvn.register_new_node_with_optimizer(ex);
    _phase->set_ctrl(ex, _phase->get_ctrl(def));
    _igvn.replace_input_of(use, idx, ex);
--- a/Show More
+++ b/Show More