Merge

2015-04-17 14:37:20 -07:00 · 2015-04-17 14:37:20 -07:00 · 7bb3af5d16
commit 7bb3af5d16
parent 38e3be4c28 b9c00b1904
276 changed files with 9876 additions and 4862 deletions
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java
@ -29,9 +29,9 @@ import java.util.Observable;
 import java.util.Observer;

 import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.gc_interface.CollectedHeap;
 import sun.jvm.hotspot.gc_interface.CollectedHeapName;
 import sun.jvm.hotspot.memory.MemRegion;
-import sun.jvm.hotspot.memory.SharedHeap;
 import sun.jvm.hotspot.memory.SpaceClosure;
 import sun.jvm.hotspot.runtime.VM;
 import sun.jvm.hotspot.runtime.VMObjectFactory;
@ -41,7 +41,7 @@ import sun.jvm.hotspot.types.TypeDataBase;

 // Mirror class for G1CollectedHeap.

-public class G1CollectedHeap extends SharedHeap {
+public class G1CollectedHeap extends CollectedHeap {
    // HeapRegionManager _hrm;
    static private long hrmFieldOffset;
    // MemRegion _g1_reserved;
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,7 +32,7 @@ import sun.jvm.hotspot.memory.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.types.*;

-public class CollectedHeap extends VMObject {
+public abstract class CollectedHeap extends VMObject {
  private static long         reservedFieldOffset;

  static {
@ -73,9 +73,7 @@ public class CollectedHeap extends VMObject {
    return reservedRegion().contains(a);
  }

-  public CollectedHeapName kind() {
-    return CollectedHeapName.ABSTRACT;
-  }
+  public abstract CollectedHeapName kind();

  public void print() { printOn(System.out); }
  public void printOn(PrintStream tty) {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -31,8 +31,6 @@ public class CollectedHeapName {

  private CollectedHeapName(String name) { this.name = name; }

-  public static final CollectedHeapName ABSTRACT = new CollectedHeapName("abstract");
-  public static final CollectedHeapName SHARED_HEAP = new CollectedHeapName("SharedHeap");
  public static final CollectedHeapName GEN_COLLECTED_HEAP = new CollectedHeapName("GenCollectedHeap");
  public static final CollectedHeapName G1_COLLECTED_HEAP = new CollectedHeapName("G1CollectedHeap");
  public static final CollectedHeapName PARALLEL_SCAVENGE_HEAP = new CollectedHeapName("ParallelScavengeHeap");
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java
@ -33,8 +33,7 @@ import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;

-public class GenCollectedHeap extends SharedHeap {
-  private static CIntegerField nGensField;
+public class GenCollectedHeap extends CollectedHeap {
  private static AddressField youngGenField;
  private static AddressField oldGenField;

@ -54,7 +53,6 @@ public class GenCollectedHeap extends SharedHeap {
  private static synchronized void initialize(TypeDataBase db) {
    Type type = db.lookupType("GenCollectedHeap");

-    nGensField = type.getCIntegerField("_n_gens");
    youngGenField = type.getAddressField("_young_gen");
    oldGenField = type.getAddressField("_old_gen");

@ -70,7 +68,7 @@ public class GenCollectedHeap extends SharedHeap {
  }

  public int nGens() {
-    return (int) nGensField.getValue(addr);
+    return 2; // Young + Old
  }

  public Generation getGen(int i) {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -112,11 +112,7 @@ public class Universe {
    return "";
  }
  public CollectedHeap heap() {
-    try {
-      return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue());
-    } catch (WrongTypeException e) {
-      return new CollectedHeap(collectedHeapField.getValue());
-    }
+    return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue());
  }

  public static long getNarrowOopBase() {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java
@ -81,53 +81,48 @@ public class HeapSummary extends Tool {
      System.out.println();
      System.out.println("Heap Usage:");

-      if (heap instanceof SharedHeap) {
-         SharedHeap sharedHeap = (SharedHeap) heap;
-         if (sharedHeap instanceof GenCollectedHeap) {
-            GenCollectedHeap genHeap = (GenCollectedHeap) sharedHeap;
-            for (int n = 0; n < genHeap.nGens(); n++) {
-               Generation gen = genHeap.getGen(n);
-               if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) {
-                  System.out.println("New Generation (Eden + 1 Survivor Space):");
-                  printGen(gen);
+      if (heap instanceof GenCollectedHeap) {
+         GenCollectedHeap genHeap = (GenCollectedHeap) heap;
+         for (int n = 0; n < genHeap.nGens(); n++) {
+            Generation gen = genHeap.getGen(n);
+            if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) {
+               System.out.println("New Generation (Eden + 1 Survivor Space):");
+               printGen(gen);

-                  ContiguousSpace eden = ((DefNewGeneration)gen).eden();
-                  System.out.println("Eden Space:");
-                  printSpace(eden);
+               ContiguousSpace eden = ((DefNewGeneration)gen).eden();
+               System.out.println("Eden Space:");
+               printSpace(eden);

-                  ContiguousSpace from = ((DefNewGeneration)gen).from();
-                  System.out.println("From Space:");
-                  printSpace(from);
+               ContiguousSpace from = ((DefNewGeneration)gen).from();
+               System.out.println("From Space:");
+               printSpace(from);

-                  ContiguousSpace to = ((DefNewGeneration)gen).to();
-                  System.out.println("To Space:");
-                  printSpace(to);
-               } else {
-                  System.out.println(gen.name() + ":");
-                  printGen(gen);
-               }
+               ContiguousSpace to = ((DefNewGeneration)gen).to();
+               System.out.println("To Space:");
+               printSpace(to);
+            } else {
+               System.out.println(gen.name() + ":");
+               printGen(gen);
            }
-         } else if (sharedHeap instanceof G1CollectedHeap) {
-             G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap;
-             G1MonitoringSupport g1mm = g1h.g1mm();
-             long edenRegionNum = g1mm.edenRegionNum();
-             long survivorRegionNum = g1mm.survivorRegionNum();
-             HeapRegionSetBase oldSet = g1h.oldSet();
-             HeapRegionSetBase humongousSet = g1h.humongousSet();
-             long oldRegionNum = oldSet.count().length()
-                          + humongousSet.count().capacity() / HeapRegion.grainBytes();
-             printG1Space("G1 Heap:", g1h.n_regions(),
-                          g1h.used(), g1h.capacity());
-             System.out.println("G1 Young Generation:");
-             printG1Space("Eden Space:", edenRegionNum,
-                          g1mm.edenUsed(), g1mm.edenCommitted());
-             printG1Space("Survivor Space:", survivorRegionNum,
-                          g1mm.survivorUsed(), g1mm.survivorCommitted());
-             printG1Space("G1 Old Generation:", oldRegionNum,
-                          g1mm.oldUsed(), g1mm.oldCommitted());
-         } else {
-             throw new RuntimeException("unknown SharedHeap type : " + heap.getClass());
         }
+      } else if (heap instanceof G1CollectedHeap) {
+          G1CollectedHeap g1h = (G1CollectedHeap) heap;
+          G1MonitoringSupport g1mm = g1h.g1mm();
+          long edenRegionNum = g1mm.edenRegionNum();
+          long survivorRegionNum = g1mm.survivorRegionNum();
+          HeapRegionSetBase oldSet = g1h.oldSet();
+          HeapRegionSetBase humongousSet = g1h.humongousSet();
+          long oldRegionNum = oldSet.count().length()
+                       + humongousSet.count().capacity() / HeapRegion.grainBytes();
+          printG1Space("G1 Heap:", g1h.n_regions(),
+                       g1h.used(), g1h.capacity());
+          System.out.println("G1 Young Generation:");
+          printG1Space("Eden Space:", edenRegionNum,
+                       g1mm.edenUsed(), g1mm.edenCommitted());
+          printG1Space("Survivor Space:", survivorRegionNum,
+                       g1mm.survivorUsed(), g1mm.survivorCommitted());
+          printG1Space("G1 Old Generation:", oldRegionNum,
+                       g1mm.oldUsed(), g1mm.oldCommitted());
      } else if (heap instanceof ParallelScavengeHeap) {
         ParallelScavengeHeap psh = (ParallelScavengeHeap) heap;
         PSYoungGen youngGen = psh.youngGen();
--- a/hotspot/make/aix/makefiles/adlc.make
+++ b/hotspot/make/aix/makefiles/adlc.make
@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -140,13 +140,7 @@ ADLCFLAGS += $(SYSDEFS)
 # Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
 ADLCFLAGS += -q -T

-# Normally, debugging is done directly on the ad_<arch>*.cpp files.
-# But -g will put #line directives in those files pointing back to <arch>.ad.
-# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
-# so skip it for 3.2 and ealier.
-ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
 ADLCFLAGS += -g
-endif

 ifdef LP64
 ADLCFLAGS += -D_LP64
--- a/hotspot/make/aix/makefiles/mapfile-vers-debug
+++ b/hotspot/make/aix/makefiles/mapfile-vers-debug
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/aix/makefiles/mapfile-vers-product
+++ b/hotspot/make/aix/makefiles/mapfile-vers-product
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetInheritedAccessControlContext;
--- a/hotspot/make/aix/makefiles/ppc64.make
+++ b/hotspot/make/aix/makefiles/ppc64.make
@ -1,6 +1,6 @@
 #
-# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -71,9 +71,6 @@ OPT_CFLAGS += -qstrict
 OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
 OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)

-# xlc 10.01 parameters for ipa compile.
-QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
-
 # Xlc 10.1 parameters for aggressive optimization:
 # - qhot=level=1: Most aggressive loop optimizations.
 # - qignerrno: Assume errno is not modified by system calls.
@ -88,7 +85,7 @@ QV10_OPT_CONSERVATIVE=$(if $(CXX_IS_V10),-qhot=level=1 -qignerrno -qinline)
 OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline

 # Set all the xlC V10.1 options here.
-OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
+OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)

 export OBJECT_MODE=64

--- a/hotspot/make/aix/makefiles/xlc.make
+++ b/hotspot/make/aix/makefiles/xlc.make
@ -1,6 +1,6 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, 2013 SAP. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2015 SAP. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -34,13 +34,17 @@ HOSTCC  = $(CC)

 AS  = $(CC) -c

-# get xlc version
-CXX_VERSION   := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
+# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version,
+# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level.
+# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing
+# the version numbers (some shells only support 32-bit integer compares!).
+CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \
+                   sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p')

 # xlc 08.00.0000.0023 and higher supports -qtune=balanced
-CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
+CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi)
 # xlc 10.01 is used with aggressive optimizations to boost performance
-CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
+CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi)

 # check for precompiled headers support

--- a/hotspot/make/bsd/makefiles/mapfile-vers-darwin-debug
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-darwin-debug
@ -105,6 +105,7 @@
                _JVM_GetClassTypeAnnotations
                _JVM_GetDeclaredClasses
                _JVM_GetDeclaringClass
+                _JVM_GetSimpleBinaryName
                _JVM_GetEnclosingMethodInfo
                _JVM_GetFieldIxModifiers
                _JVM_GetFieldTypeAnnotations
--- a/hotspot/make/bsd/makefiles/mapfile-vers-darwin-product
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-darwin-product
@ -105,6 +105,7 @@
                _JVM_GetClassTypeAnnotations
                _JVM_GetDeclaredClasses
                _JVM_GetDeclaringClass
+                _JVM_GetSimpleBinaryName
                _JVM_GetEnclosingMethodInfo
                _JVM_GetFieldIxModifiers
                _JVM_GetFieldTypeAnnotations
--- a/hotspot/make/bsd/makefiles/mapfile-vers-debug
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-debug
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/bsd/makefiles/mapfile-vers-product
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-product
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/build.sh
+++ b/hotspot/make/build.sh
@ -40,7 +40,7 @@ if [ $# -lt 1 ]; then
    exit 1
 fi

-if [ "${JAVA_HOME-}" = ""  -o  ! -d "${JAVA_HOME-}" -o ! -d ${JAVA_HOME-}/jre/lib/ ]; then
+if [ "${JAVA_HOME-}" = ""  -o  ! -d "${JAVA_HOME-}" ]; then
    echo "JAVA_HOME needs to be set to a valid JDK path"
    echo "JAVA_HOME: ${JAVA_HOME-}"
    exit 1
--- a/hotspot/make/linux/makefiles/mapfile-vers-debug
+++ b/hotspot/make/linux/makefiles/mapfile-vers-debug
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/linux/makefiles/mapfile-vers-product
+++ b/hotspot/make/linux/makefiles/mapfile-vers-product
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/solaris/makefiles/mapfile-vers
+++ b/hotspot/make/solaris/makefiles/mapfile-vers
@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #

 # Define public interface.
@ -30,7 +30,7 @@ SUNWprivate_1.1 {
                JNI_CreateJavaVM;
                JNI_GetCreatedJavaVMs;
                JNI_GetDefaultJavaVMInitArgs;
-        
+
                # JVM
                JVM_ActiveProcessorCount;
                JVM_ArrayCopy;
@ -107,6 +107,7 @@ SUNWprivate_1.1 {
                JVM_GetClassTypeAnnotations;
                JVM_GetDeclaredClasses;
                JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                JVM_GetEnclosingMethodInfo;
                JVM_GetFieldIxModifiers;
                JVM_GetFieldTypeAnnotations;
--- a/hotspot/make/solaris/makefiles/product.make
+++ b/hotspot/make/solaris/makefiles/product.make
@ -37,6 +37,11 @@ ifndef USE_GCC
 OPT_CFLAGS/ciEnv.o = $(OPT_CFLAGS) -xinline=no%__1cFciEnvbFpost_compiled_method_load_event6MpnHnmethod__v_
 endif

+# Need extra inlining to get oop_ps_push_contents functions to perform well enough.
+ifndef USE_GCC
+OPT_CFLAGS/psPromotionManager.o = $(OPT_CFLAGS) -W2,-Ainline:inc=1000
+endif
+
 # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
 ifeq ("${Platform_compiler}", "sparcWorks")

--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -1818,6 +1818,8 @@ typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address
    case INDINDEXSCALEDI2L:
    case INDINDEXSCALEDOFFSETI2LN:
    case INDINDEXSCALEDI2LN:
+    case INDINDEXOFFSETI2L:
+    case INDINDEXOFFSETI2LN:
      scale = Address::sxtw(size);
      break;
    default:
@ -4264,6 +4266,20 @@ operand indIndexScaledOffsetL(iRegP reg, iRegL lreg, immIScale scale, immLU12 of
  %}
 %}

+operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
 %{
  constraint(ALLOC_IN_RC(ptr_reg));
@ -4324,7 +4340,7 @@ operand indOffI(iRegP reg, immIOffset off)
 %{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);
-  op_cost(INSN_COST);
+  op_cost(0);
  format %{ "[$reg, $off]" %}
  interface(MEMORY_INTER) %{
    base($reg);
@ -4394,6 +4410,21 @@ operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLU12 o
  %}
 %}

+operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
 %{
  predicate(Universe::narrow_oop_shift() == 0);
@ -4656,8 +4687,8 @@ operand iRegL2I(iRegL reg) %{
 // memory is used to define read/write location for load/store
 // instruction defs. we can turn a memory op into an Address

-opclass memory(indirect, indIndexScaledOffsetI,  indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
-               indirectN, indIndexScaledOffsetIN,  indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
+opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+               indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);


 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@ -7523,7 +7554,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
  match(Set dst (AddP src1 (ConvI2L src2)));

-  ins_cost(INSN_COST);
+  ins_cost(1.9 * INSN_COST);
  format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}

  ins_encode %{
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1469,7 +1469,7 @@ public:
    f(op, 31, 29);
    f(0b11010000, 28, 21);
    f(0b000000, 15, 10);
-    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);
+    zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
  }

  #define INSN(NAME, op)                                \
--- a/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -40,6 +40,8 @@ class InterpreterMacroAssembler: public MacroAssembler {

 protected:
  // Interpreter specific version of call_VM_base
+  using MacroAssembler::call_VM_leaf_base;
+
  virtual void call_VM_leaf_base(address entry_point,
                                 int number_of_arguments);

--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -2245,6 +2245,341 @@ void MacroAssembler::pop_CPU_state() {
  pop(0x3fffffff, sp);         // integer registers except lr & sp
 }

+/**
+ * Helpers for multiply_to_len().
+ */
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                                     Register src1, Register src2) {
+  adds(dest_lo, dest_lo, src1);
+  adc(dest_hi, dest_hi, zr);
+  adds(dest_lo, dest_lo, src2);
+  adc(final_dest_hi, dest_hi, zr);
+}
+
+// Generate an address from (r + r1 extend offset).  "size" is the
+// size of the operand.  The result may be in rscratch2.
+Address MacroAssembler::offsetted_address(Register r, Register r1,
+                                          Address::extend ext, int offset, int size) {
+  if (offset || (ext.shift() % size != 0)) {
+    lea(rscratch2, Address(r, r1, ext));
+    return Address(rscratch2, offset);
+  } else {
+    return Address(r, r1, ext);
+  }
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx) {
+  //
+  //  jlong carry, x[], y[], z[];
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    huge_128 product = y[idx] * x[xstart] + carry;
+  //    z[kdx] = (jlong)product;
+  //    carry  = (jlong)(product >>> 64);
+  //  }
+  //  z[xstart] = carry;
+  //
+
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
+
+  subsw(xstart, xstart, 1);
+  br(Assembler::MI, L_one_x);
+
+  lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt)));
+  ldr(x_xstart, Address(rscratch1));
+  ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian
+
+  bind(L_first_loop);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_first_loop_exit);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_one_y);
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(y_idx, Address(rscratch1));
+  ror(y_idx, y_idx, 32); // convert big-endian to little-endian
+  bind(L_multiply);
+
+  // AArch64 has a multiply-accumulate instruction that we can't use
+  // here because it has no way to process carries, so we have to use
+  // separate add and adc instructions.  Bah.
+  umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product
+  mul(product, x_xstart, y_idx);
+  adds(product, product, carry);
+  adc(carry, rscratch1, zr);   // x_xstart * y_idx + carry -> carry:product
+
+  subw(kdx, kdx, 2);
+  ror(product, product, 32); // back to big-endian
+  str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong));
+
+  b(L_first_loop);
+
+  bind(L_one_y);
+  ldrw(y_idx, Address(y,  0));
+  b(L_multiply);
+
+  bind(L_one_x);
+  ldrw(x_xstart, Address(x,  0));
+  b(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 128 bit by 128. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
+                                             Register carry, Register carry2,
+                                             Register idx, Register jdx,
+                                             Register yz_idx1, Register yz_idx2,
+                                             Register tmp, Register tmp3, Register tmp4,
+                                             Register tmp6, Register product_hi) {
+
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = ystart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
+  //     carry  = (jlong)(tmp4 >>> 64);
+  //     z[kdx+idx+1] = (jlong)tmp3;
+  //     z[kdx+idx] = (jlong)tmp4;
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)yz_idx1;
+  //     carry  = (jlong)(yz_idx1 >>> 64);
+  //   }
+  //
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  lsrw(jdx, idx, 2);
+
+  bind(L_third_loop);
+
+  subsw(jdx, jdx, 1);
+  br(Assembler::MI, L_third_loop_exit);
+  subw(idx, idx, 4);
+
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+
+  ldp(yz_idx2, yz_idx1, Address(rscratch1, 0));
+
+  lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+
+  ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+  ror(yz_idx2, yz_idx2, 32);
+
+  ldp(rscratch2, rscratch1, Address(tmp6, 0));
+
+  mul(tmp3, product_hi, yz_idx1);  //  yz_idx1 * product_hi -> tmp4:tmp3
+  umulh(tmp4, product_hi, yz_idx1);
+
+  ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian
+  ror(rscratch2, rscratch2, 32);
+
+  mul(tmp, product_hi, yz_idx2);   //  yz_idx2 * product_hi -> carry2:tmp
+  umulh(carry2, product_hi, yz_idx2);
+
+  // propagate sum of both multiplications into carry:tmp4:tmp3
+  adds(tmp3, tmp3, carry);
+  adc(tmp4, tmp4, zr);
+  adds(tmp3, tmp3, rscratch1);
+  adcs(tmp4, tmp4, tmp);
+  adc(carry, carry2, zr);
+  adds(tmp4, tmp4, rscratch2);
+  adc(carry, carry, zr);
+
+  ror(tmp3, tmp3, 32); // convert little-endian to big-endian
+  ror(tmp4, tmp4, 32);
+  stp(tmp4, tmp3, Address(tmp6, 0));
+
+  b(L_third_loop);
+  bind (L_third_loop_exit);
+
+  andw (idx, idx, 0x3);
+  cbz(idx, L_post_third_loop_done);
+
+  Label L_check_1;
+  subsw(idx, idx, 2);
+  br(Assembler::MI, L_check_1);
+
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(yz_idx1, Address(rscratch1, 0));
+  ror(yz_idx1, yz_idx1, 32);
+  mul(tmp3, product_hi, yz_idx1);  //  yz_idx1 * product_hi -> tmp4:tmp3
+  umulh(tmp4, product_hi, yz_idx1);
+  lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(yz_idx2, Address(rscratch1, 0));
+  ror(yz_idx2, yz_idx2, 32);
+
+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2);
+
+  ror(tmp3, tmp3, 32);
+  str(tmp3, Address(rscratch1, 0));
+
+  bind (L_check_1);
+
+  andw (idx, idx, 0x1);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_post_third_loop_done);
+  ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  mul(tmp3, tmp4, product_hi);  //  tmp4 * product_hi -> carry2:tmp3
+  umulh(carry2, tmp4, product_hi);
+  ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+
+  add2_with_carry(carry2, tmp3, tmp4, carry);
+
+  strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+  extr(carry, carry2, tmp3, 32);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() instrinsic.
+ *
+ * r0: x
+ * r1: xlen
+ * r2: y
+ * r3: ylen
+ * r4:  z
+ * r5: zlen
+ * r10: tmp1
+ * r11: tmp2
+ * r12: tmp3
+ * r13: tmp4
+ * r14: tmp5
+ * r15: tmp6
+ * r16: tmp7
+ *
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                                     Register z, Register zlen,
+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                     Register tmp5, Register tmp6, Register product_hi) {
+
+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product  = xlen;
+  const Register x_xstart = zlen;  // reuse register
+
+  // First Loop.
+  //
+  //  final static long LONG_MASK = 0xffffffffL;
+  //  int xstart = xlen - 1;
+  //  int ystart = ylen - 1;
+  //  long carry = 0;
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
+  //    z[kdx] = (int)product;
+  //    carry = product >>> 32;
+  //  }
+  //  z[xstart] = (int)carry;
+  //
+
+  movw(idx, ylen);      // idx = ylen;
+  movw(kdx, zlen);      // kdx = xlen+ylen;
+  mov(carry, zr);       // carry = 0;
+
+  Label L_done;
+
+  movw(xstart, xlen);
+  subsw(xstart, xstart, 1);
+  br(Assembler::MI, L_done);
+
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  Label L_second_loop;
+  cbzw(kdx, L_second_loop);
+
+  Label L_carry;
+  subw(kdx, kdx, 1);
+  cbzw(kdx, L_carry);
+
+  strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
+  lsr(carry, carry, 32);
+  subw(kdx, kdx, 1);
+
+  bind(L_carry);
+  strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
+
+  const Register jdx = tmp1;
+
+  bind(L_second_loop);
+  mov(carry, zr);                // carry = 0;
+  movw(jdx, ylen);               // j = ystart+1
+
+  subsw(xstart, xstart, 1);      // i = xstart-1;
+  br(Assembler::MI, L_done);
+
+  str(z, Address(pre(sp, -4 * wordSize)));
+
+  Label L_last_x;
+  lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j
+  subsw(xstart, xstart, 1);       // i = xstart-1;
+  br(Assembler::MI, L_last_x);
+
+  lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt)));
+  ldr(product_hi, Address(rscratch1));
+  ror(product_hi, product_hi, 32);  // convert big-endian to little-endian
+
+  Label L_third_loop_prologue;
+  bind(L_third_loop_prologue);
+
+  str(ylen, Address(sp, wordSize));
+  stp(x, xstart, Address(sp, 2 * wordSize));
+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
+  ldp(z, ylen, Address(post(sp, 2 * wordSize)));
+  ldp(x, xlen, Address(post(sp, 2 * wordSize)));   // copy old xstart -> xlen
+
+  addw(tmp3, xlen, 1);
+  strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt)));
+  subsw(tmp3, tmp3, 1);
+  br(Assembler::MI, L_done);
+
+  lsr(carry, carry, 32);
+  strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt)));
+  b(L_second_loop);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  ldrw(product_hi, Address(x,  0));
+  b(L_third_loop_prologue);
+
+  bind(L_done);
+}
+
 /**
 * Emits code to update CRC-32 with a byte value according to constants in table
 *
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -167,9 +167,8 @@ class MacroAssembler: public Assembler {

  // aliases defined in AARCH64 spec

-
  template<class T>
-  inline void  cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
+  inline void cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
  inline void cmp(Register Rd, unsigned imm)  { subs(zr, Rd, imm); }

  inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); }
@ -1121,9 +1120,34 @@ public:
                      Register tmp1, Register tmp2,
                      Register tmp3, Register tmp4,
                      int int_cnt1, Register result);
-
+private:
+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                       Register src1, Register src2);
+  void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
+    add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2);
+  }
+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_128_x_128_loop(Register y, Register z,
+                               Register carry, Register carry2,
+                               Register idx, Register jdx,
+                               Register yz_idx1, Register yz_idx2,
+                               Register tmp, Register tmp3, Register tmp4,
+                               Register tmp7, Register product_hi);
+public:
+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
+                       Register zlen, Register tmp1, Register tmp2, Register tmp3,
+                       Register tmp4, Register tmp5, Register tmp6, Register tmp7);
  // ISB may be needed because of a safepoint
  void maybe_isb() { isb(); }
+
+private:
+  // Return the effective address r + (r1 << ext) + offset.
+  // Uses rscratch2.
+  Address offsetted_address(Register r, Register r1, Address::extend ext,
+                            int offset, int size);
 };

 // Used by aarch64.ad to control code generation
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -2356,8 +2356,45 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

-#undef __
-#define __ masm->
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - x address
+   *    c_rarg1   - x length
+   *    c_rarg2   - y address
+   *    c_rarg3   - y lenth
+   *    c_rarg4   - z address
+   *    c_rarg5   - z length
+   */
+  address generate_multiplyToLen() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+
+    address start = __ pc();
+    const Register x     = r0;
+    const Register xlen  = r1;
+    const Register y     = r2;
+    const Register ylen  = r3;
+    const Register z     = r4;
+    const Register zlen  = r5;
+
+    const Register tmp1  = r10;
+    const Register tmp2  = r11;
+    const Register tmp3  = r12;
+    const Register tmp4  = r13;
+    const Register tmp5  = r14;
+    const Register tmp6  = r15;
+    const Register tmp7  = r16;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }

  // Continuation point for throwing of implicit exceptions that are
  // not handled in the current activation. Fabricates an exception
@ -2375,6 +2412,9 @@ class StubGenerator: public StubCodeGenerator {
  // otherwise assume that stack unwinding will be initiated, so
  // caller saved registers were assumed volatile in the compiler.

+#undef __
+#define __ masm->
+
  address generate_throw_exception(const char* name,
                                   address runtime_entry,
                                   Register arg1 = noreg,
@ -2518,6 +2558,10 @@ class StubGenerator: public StubCodeGenerator {
    // arraycopy stubs used by compilers
    generate_arraycopy_stubs();

+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+
 #ifndef BUILTIN_SIM
    if (UseAESIntrinsics) {
      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -193,6 +193,15 @@ void VM_Version::get_processor_features() {
    }
  }

+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    UseMultiplyToLenIntrinsic = true;
+  }
+
 #ifdef COMPILER2
  if (FLAG_IS_DEFAULT(OptoScheduling)) {
    OptoScheduling = true;
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
@ -466,7 +466,7 @@ void trace_method_handle_stub(const char* adaptername,
                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
  const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23";
  tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT,
-                adaptername, mh_reg_name, (intptr_t) mh, entry_sp);
+                adaptername, mh_reg_name, p2i(mh), p2i(entry_sp));

  if (Verbose) {
    tty->print_cr("Registers:");
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -172,6 +172,12 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
  }

+  // This machine does not allow unaligned memory accesses
+  if (UseUnalignedAccesses) {
+    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+      warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
 }

 void VM_Version::print_features() {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,7 +32,7 @@

 int VM_Version::_features = VM_Version::unknown_m;
 const char* VM_Version::_features_str = "";
-unsigned int VM_Version::_L2_cache_line_size = 0;
+unsigned int VM_Version::_L2_data_cache_line_size = 0;

 void VM_Version::initialize() {
  _features = determine_features();
@ -356,10 +356,17 @@ void VM_Version::initialize() {
    (cache_line_size > ContendedPaddingWidth))
    ContendedPaddingWidth = cache_line_size;

+  // This machine does not allow unaligned memory accesses
+  if (UseUnalignedAccesses) {
+    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+      warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+
 #ifndef PRODUCT
  if (PrintMiscellaneous && Verbose) {
    tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
-    tty->print_cr("L2 cache line size: %u", L2_cache_line_size());
+    tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
    tty->print("Allocation");
    if (AllocatePrefetchStyle <= 0) {
      tty->print_cr(": no prefetching");
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp
@ -96,8 +96,8 @@ protected:
  static int  _features;
  static const char* _features_str;

-  static unsigned int _L2_cache_line_size;
-  static unsigned int L2_cache_line_size() { return _L2_cache_line_size; }
+  static unsigned int _L2_data_cache_line_size;
+  static unsigned int L2_data_cache_line_size() { return _L2_data_cache_line_size; }

  static void print_features();
  static int  determine_features();
@ -171,7 +171,7 @@ public:
  static const char* cpu_features()     { return _features_str; }

  // default prefetch block size on sparc
-  static intx prefetch_data_size()      { return L2_cache_line_size();  }
+  static intx prefetch_data_size()      { return L2_data_cache_line_size();  }

  // Prefetch
  static intx prefetch_copy_interval_in_bytes() {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -3359,6 +3359,20 @@ void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vecto


 // Integer vector arithmetic
+void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x01);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x02);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
@ -3379,6 +3393,20 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) {
  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
 }

+void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x01);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x02);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
@ -3804,6 +3832,17 @@ void Assembler::vinsertf128h(XMMRegister dst, Address src) {
  emit_int8(0x01);
 }

+void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_int8(0x19);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_int8(0x01);
+}
+
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -1777,6 +1777,12 @@ private:
  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);

+  // Add horizontal packed integers
+  void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void phaddw(XMMRegister dst, XMMRegister src);
+  void phaddd(XMMRegister dst, XMMRegister src);
+
  // Add packed integers
  void paddb(XMMRegister dst, XMMRegister src);
  void paddw(XMMRegister dst, XMMRegister src);
@ -1869,6 +1875,7 @@ private:
  // Copy low 128bit into high 128bit of YMM registers.
  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vextractf128h(XMMRegister dst, XMMRegister src);

  // Load/store high 128bit of YMM registers which does not destroy other half.
  void vinsertf128h(XMMRegister dst, Address src);
--- a/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/globalDefinitions_x86.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -58,4 +58,9 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
  #endif
 #endif

+#if defined(COMPILER2) && !defined(JAVASE_EMBEDDED)
+// Include Restricted Transactional Memory lock eliding optimization
+#define INCLUDE_RTM_OPT 1
+#endif
+
 #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@ -969,6 +969,11 @@ void VM_Version::get_processor_features() {
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;

+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
 #ifndef PRODUCT
  if (PrintMiscellaneous && Verbose) {
    tty->print_cr("Logical CPUs per core: %u",
--- a/hotspot/src/cpu/x86/vm/x86.ad
+++ b/hotspot/src/cpu/x86/vm/x86.ad
@ -490,7 +490,7 @@ source_hpp %{
 class NativeJump;

 class CallStubImpl {
- 
+
  //--------------------------------------------------------------
  //---<  Used for optimization in Compile::shorten_branches  >---
  //--------------------------------------------------------------
@ -500,9 +500,9 @@ class CallStubImpl {
  static uint size_call_trampoline() {
    return 0; // no call trampolines on this platform
  }
-  
+
  // number of relocations needed by a call trampoline stub
-  static uint reloc_call_trampoline() { 
+  static uint reloc_call_trampoline() {
    return 0; // no call trampolines on this platform
  }
 };
@ -623,6 +623,22 @@ const bool Matcher::match_rule_supported(int opcode) {
      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
        return false;
    break;
+    case Op_AddReductionVL:
+      if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
+        return false;
+    case Op_AddReductionVI:
+      if (UseSSE < 3) // requires at least SSE3
+        return false;
+    case Op_MulReductionVI:
+      if (UseSSE < 4) // requires at least SSE4
+        return false;
+    case Op_AddReductionVF:
+    case Op_AddReductionVD:
+    case Op_MulReductionVF:
+    case Op_MulReductionVD:
+      if (UseSSE < 1) // requires at least SSE
+        return false;
+    break;
    case Op_CompareAndSwapL:
 #ifdef _LP64
    case Op_CompareAndSwapP:
@ -2532,6 +2548,574 @@ instruct Repl4D_zero(vecY dst, immD0 zero) %{
  ins_pipe( fpu_reg_reg );
 %}

+// ====================REDUCTION ARITHMETIC=======================================
+
+instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE > 2 && UseAVX == 0);
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP tmp2, TEMP tmp);
+  format %{ "movdqu  $tmp2,$src2\n\t"
+            "phaddd  $tmp2,$tmp2\n\t"
+            "movd    $tmp,$src1\n\t"
+            "paddd   $tmp,$tmp2\n\t"
+            "movd    $dst,$tmp\t! add reduction2I" %}
+  ins_encode %{
+    __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdl($tmp$$XMMRegister, $src1$$Register);
+    __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vphaddd $tmp,$src2,$src2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpaddd  $tmp2,$tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! add reduction2I" %}
+  ins_encode %{
+    __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE > 2 && UseAVX == 0);
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP tmp2, TEMP tmp);
+  format %{ "movdqu  $tmp2,$src2\n\t"
+            "phaddd  $tmp2,$tmp2\n\t"
+            "phaddd  $tmp2,$tmp2\n\t"
+            "movd    $tmp,$src1\n\t"
+            "paddd   $tmp,$tmp2\n\t"
+            "movd    $dst,$tmp\t! add reduction4I" %}
+  ins_encode %{
+    __ movdqu($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+    __ phaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdl($tmp$$XMMRegister, $src1$$Register);
+    __ paddd($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdl($dst$$Register, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vphaddd $tmp,$src2,$src2\n\t"
+            "vphaddd $tmp,$tmp,$tmp2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpaddd  $tmp2,$tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! add reduction4I" %}
+  ins_encode %{
+    __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, false);
+    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vphaddd $tmp,$src2,$src2\n\t"
+            "vphaddd $tmp,$tmp,$tmp2\n\t"
+            "vextractf128  $tmp2,$tmp\n\t"
+            "vpaddd  $tmp,$tmp,$tmp2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpaddd  $tmp2,$tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! add reduction8I" %}
+  ins_encode %{
+    __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, true);
+    __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, true);
+    __ vextractf128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "addss   $tmp,$src2\n\t"
+            "pshufd  $tmp2,$src2,0x01\n\t"
+            "addss   $tmp,$tmp2\n\t"
+            "movdqu  $dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp2, TEMP tmp);
+  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vaddss  $dst,$tmp2,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "addss   $tmp,$src2\n\t"
+            "pshufd  $tmp2,$src2,0x01\n\t"
+            "addss   $tmp,$tmp2\n\t"
+            "pshufd  $tmp2,$src2,0x02\n\t"
+            "addss   $tmp,$tmp2\n\t"
+            "pshufd  $tmp2,$src2,0x03\n\t"
+            "addss   $tmp,$tmp2\n\t"
+            "movdqu  $dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ addss($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ addss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "vaddss  $dst,$tmp2,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct radd8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+  format %{ "vaddss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "vextractf128  $tmp3,$src2\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp3\n\t"
+            "pshufd  $tmp,$tmp3,0x01\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$tmp3,0x02\n\t"
+            "vaddss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$tmp3,0x03\n\t"
+            "vaddss  $dst,$tmp2,$tmp\t! add reduction8F" %}
+  ins_encode %{
+    __ vaddss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+    __ vaddss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+    __ vaddss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (AddReductionVD src1 src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "addsd   $tmp,$src2\n\t"
+            "pshufd  $dst,$src2,0xE\n\t"
+            "addsd   $dst,$tmp\t! add reduction2D" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ addsd($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ addsd($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVD src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
+            "vaddsd  $dst,$tmp2,$tmp\t! add reduction2D" %}
+  ins_encode %{
+    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvadd4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+  predicate(UseAVX > 0);
+  match(Set dst (AddReductionVD src1 src2));
+  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+  format %{ "vaddsd  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
+            "vaddsd  $tmp2,$tmp2,$tmp\n\t"
+            "vextractf128  $tmp3,$src2\n\t"
+            "vaddsd  $tmp2,$tmp2,$tmp3\n\t"
+            "pshufd  $tmp,$tmp3,0xE\n\t"
+            "vaddsd  $dst,$tmp2,$tmp\t! add reduction4D" %}
+  ins_encode %{
+    __ vaddsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vaddsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+    __ vaddsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE > 3 && UseAVX == 0);
+  match(Set dst (MulReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "pshufd  $tmp2,$src2,0x1\n\t"
+            "pmulld  $tmp2,$src2\n\t"
+            "movd    $tmp,$src1\n\t"
+            "pmulld  $tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! mul reduction2I" %}
+  ins_encode %{
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ movdl($tmp$$XMMRegister, $src1$$Register);
+    __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "pshufd  $tmp2,$src2,0x1\n\t"
+            "vpmulld $tmp,$src2,$tmp2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpmulld $tmp2,$tmp,$tmp2\n\t"
+            "movd    $dst,$tmp2\t! mul reduction2I" %}
+  ins_encode %{
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE > 3 && UseAVX == 0);
+  match(Set dst (MulReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "pshufd  $tmp2,$src2,0xE\n\t"
+            "pmulld  $tmp2,$src2\n\t"
+            "pshufd  $tmp,$tmp2,0x1\n\t"
+            "pmulld  $tmp2,$tmp\n\t"
+            "movd    $tmp,$src1\n\t"
+            "pmulld  $tmp2,$tmp\n\t"
+            "movd    $dst,$tmp2\t! mul reduction4I" %}
+  ins_encode %{
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ pmulld($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x1);
+    __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($tmp$$XMMRegister, $src1$$Register);
+    __ pmulld($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "pshufd  $tmp2,$src2,0xE\n\t"
+            "vpmulld $tmp,$src2,$tmp2\n\t"
+            "pshufd  $tmp2,$tmp,0x1\n\t"
+            "vpmulld $tmp,$tmp,$tmp2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpmulld $tmp2,$tmp,$tmp2\n\t"
+            "movd    $dst,$tmp2\t! mul reduction4I" %}
+  ins_encode %{
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ vpmulld($tmp$$XMMRegister, $src2$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+    __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVI src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vextractf128  $tmp,$src2\n\t"
+            "vpmulld $tmp,$tmp,$src2\n\t"
+            "pshufd  $tmp2,$tmp,0xE\n\t"
+            "vpmulld $tmp,$tmp,$tmp2\n\t"
+            "pshufd  $tmp2,$tmp,0x1\n\t"
+            "vpmulld $tmp,$tmp,$tmp2\n\t"
+            "movd    $tmp2,$src1\n\t"
+            "vpmulld $tmp2,$tmp,$tmp2\n\t"
+            "movd    $dst,$tmp2\t! mul reduction8I" %}
+  ins_encode %{
+    __ vextractf128h($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, false);
+    __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
+    __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0x1);
+    __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($tmp2$$XMMRegister, $src1$$Register);
+    __ vpmulld($tmp2$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, false);
+    __ movdl($dst$$Register, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (MulReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "mulss   $tmp,$src2\n\t"
+            "pshufd  $tmp2,$src2,0x01\n\t"
+            "mulss   $tmp,$tmp2\n\t"
+            "movdqu  $dst,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2F_reduction_reg(regF dst, regF src1, vecD src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vmulss  $dst,$tmp2,$tmp\t! add reduction2F" %}
+  ins_encode %{
+    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (MulReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "mulss   $tmp,$src2\n\t"
+            "pshufd  $tmp2,$src2,0x01\n\t"
+            "mulss   $tmp,$tmp2\n\t"
+            "pshufd  $tmp2,$src2,0x02\n\t"
+            "mulss   $tmp,$tmp2\n\t"
+            "pshufd  $tmp2,$src2,0x03\n\t"
+            "mulss   $tmp,$tmp2\n\t"
+            "movdqu  $dst,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ mulss($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ pshufd($tmp2$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ mulss($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ movdqu($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4F_reduction_reg(regF dst, regF src1, vecX src2, regF tmp, regF tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "vmulss  $dst,$tmp2,$tmp\t! add reduction4F" %}
+  ins_encode %{
+    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul8F_reduction_reg(regF dst, regF src1, vecY src2, regF tmp, regF tmp2, regF tmp3) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVF src1 src2));
+  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+  format %{ "vmulss  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0x01\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x02\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$src2,0x03\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "vextractf128  $tmp3,$src2\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp3\n\t"
+            "pshufd  $tmp,$tmp3,0x01\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$tmp3,0x02\n\t"
+            "vmulss  $tmp2,$tmp2,$tmp\n\t"
+            "pshufd  $tmp,$tmp3,0x03\n\t"
+            "vmulss  $dst,$tmp2,$tmp\t! mul reduction8F" %}
+  ins_encode %{
+    __ vmulss($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x01);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x02);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x01);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x02);
+    __ vmulss($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0x03);
+    __ vmulss($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rsmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp) %{
+  predicate(UseSSE >= 1 && UseAVX == 0);
+  match(Set dst (MulReductionVD src1 src2));
+  effect(TEMP tmp, TEMP dst);
+  format %{ "movdqu  $tmp,$src1\n\t"
+            "mulsd   $tmp,$src2\n\t"
+            "pshufd  $dst,$src2,0xE\n\t"
+            "mulsd   $dst,$tmp\t! add reduction2D" %}
+  ins_encode %{
+    __ movdqu($tmp$$XMMRegister, $src1$$XMMRegister);
+    __ mulsd($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ mulsd($dst$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul2D_reduction_reg(regD dst, regD src1, vecX src2, regD tmp, regD tmp2) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVD src1 src2));
+  effect(TEMP tmp, TEMP tmp2);
+  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
+            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction2D" %}
+  ins_encode %{
+    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rvmul4D_reduction_reg(regD dst, regD src1, vecY src2, regD tmp, regD tmp2, regD tmp3) %{
+  predicate(UseAVX > 0);
+  match(Set dst (MulReductionVD src1 src2));
+  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
+  format %{ "vmulsd  $tmp2,$src1,$src2\n\t"
+            "pshufd  $tmp,$src2,0xE\n\t"
+            "vmulsd  $tmp2,$tmp2,$tmp\n\t"
+            "vextractf128  $tmp3,$src2\n\t"
+            "vmulsd  $tmp2,$tmp2,$tmp3\n\t"
+            "pshufd  $tmp,$tmp3,0xE\n\t"
+            "vmulsd  $dst,$tmp2,$tmp\t! mul reduction4D" %}
+  ins_encode %{
+    __ vmulsd($tmp2$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
+    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vextractf128h($tmp3$$XMMRegister, $src2$$XMMRegister);
+    __ vmulsd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister);
+    __ pshufd($tmp$$XMMRegister, $tmp3$$XMMRegister, 0xE);
+    __ vmulsd($dst$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // ====================VECTOR ARITHMETIC=======================================

 // --------------------------------- ADD --------------------------------------
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@ -3604,6 +3604,23 @@ operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
  %}
 %}

+// Indirect Memory Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP reg (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
 %{
@ -3755,6 +3772,23 @@ operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale
  %}
 %}

+// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
+operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
+  match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $idx]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($idx);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
 operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
 %{
@ -3946,11 +3980,11 @@ operand cmpOpUCF2() %{
 // case of this is memory operands.

 opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
-               indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
+               indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
               indCompressedOopOffset,
               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
-               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
+               indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);

 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@ -4984,6 +5018,17 @@ instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
  ins_pipe(ialu_reg_reg_fat);
 %}

+instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
 %{
  match(Set dst mem);
@ -5068,6 +5113,18 @@ instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
  ins_pipe(ialu_reg_reg_fat);
 %}

+instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
+  opcode(0x8D);
+  ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
+  ins_pipe(ialu_reg_reg_fat);
+%}
+
 instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
 %{
  predicate(Universe::narrow_oop_shift() == 0);
--- a/hotspot/src/cpu/zero/vm/vm_version_zero.cpp
+++ b/hotspot/src/cpu/zero/vm/vm_version_zero.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2009 Red Hat, Inc.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
@ -30,4 +30,11 @@
 #include "runtime/stubCodeGenerator.hpp"
 #include "vm_version_zero.hpp"

-// This file is intentionally empty
+
+void VM_Version::initialize() {
+  // This machine does not allow unaligned memory accesses
+  if (! FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+}
--- a/hotspot/src/os/aix/vm/attachListener_aix.cpp
+++ b/hotspot/src/os/aix/vm/attachListener_aix.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -144,6 +144,10 @@ class ArgumentIterator : public StackObj {
  }
  char* next() {
    if (*_pos == '\0') {
+      if (_pos < _end) {
+        _pos += 1;
+      }
+
      return NULL;
    }
    char* res = _pos;
@ -214,6 +218,7 @@ int AixAttachListener::init() {

  // bind socket
  struct sockaddr_un addr;
+  memset((void *)&addr, 0, sizeof(addr));
  addr.sun_family = AF_UNIX;
  strcpy(addr.sun_path, initial_path);
  ::unlink(initial_path);
--- a/hotspot/src/os/aix/vm/globals_aix.hpp
+++ b/hotspot/src/os/aix/vm/globals_aix.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -31,6 +31,10 @@
 //
 #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
                                                                                    \
+  /* Use 64K pages for virtual memory (shmat). */                                   \
+  product(bool, Use64KPages, true,                                                  \
+          "Use 64K pages if available.")                                            \
+                                                                                    \
  /* If UseLargePages == true allow or deny usage of 16M pages. 16M pages are  */   \
  /* a scarce resource and there may be situations where we do not want the VM */   \
  /* to run with 16M pages. (Will fall back to 64K pages).                     */   \
@ -55,7 +59,7 @@ define_pd_global(intx, AttachListenerTimeout, 1000);
 // Defines Aix-specific default values. The flags are available on all
 // platforms, but they may have different default values on other platforms.
 //
-define_pd_global(bool, UseLargePages, true);
+define_pd_global(bool, UseLargePages, false);
 define_pd_global(bool, UseLargePagesIndividualAllocation, false);
 define_pd_global(bool, UseOSErrorReporting, false);
 define_pd_global(bool, UseThreadPriorities, true) ;
--- a/hotspot/src/os/aix/vm/interfaceSupport_aix.hpp
+++ b/hotspot/src/os/aix/vm/interfaceSupport_aix.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,8 +23,8 @@
 *
 */

-#ifndef OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
-#define OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
+#ifndef OS_AIX_VM_INTERFACESUPPORT_AIX_HPP
+#define OS_AIX_VM_INTERFACESUPPORT_AIX_HPP

 // Contains inlined functions for class InterfaceSupport

@ -32,4 +32,4 @@ static inline void serialize_memory(JavaThread *thread) {
  os::write_memory_serialize_page(thread);
 }

-#endif // OS_LINUX_VM_INTERFACESUPPORT_LINUX_HPP
+#endif // OS_AIX_VM_INTERFACESUPPORT_AIX_HPP
--- a/hotspot/src/os/aix/vm/osThread_aix.cpp
+++ b/hotspot/src/os/aix/vm/osThread_aix.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -47,7 +47,7 @@ void OSThread::pd_initialize() {

  _startThread_lock = new Monitor(Mutex::event, "startThread_lock", true,
                                  Monitor::_safepoint_check_never);
-  assert(_startThread_lock !=NULL, "check");
+  assert(_startThread_lock != NULL, "check");
 }

 void OSThread::pd_destroy() {
--- a/hotspot/src/os/aix/vm/os_aix.cpp
+++ b/hotspot/src/os/aix/vm/os_aix.cpp
--- a/hotspot/src/os/aix/vm/os_aix.hpp
+++ b/hotspot/src/os/aix/vm/os_aix.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -35,9 +35,9 @@ class Aix {
  friend class os;

  // For signal-chaining
-  // highest so far (AIX 5.2) is SIGSAK (63)
+  // highest so far (AIX 5.2 - 6.1) is SIGSAK (63)
 #define MAXSIGNUM 63
-  // length of strings included in the libperfstat structures
+  // Length of strings included in the libperfstat structures.
 #define IDENTIFIER_LENGTH 64

  static struct sigaction sigact[MAXSIGNUM]; // saved preinstalled sigactions
@ -111,22 +111,6 @@ class Aix {
  // (should be LDR_CNTRL DATAPSIZE because stack is allocated on heap by pthread lib)
  static int _stack_page_size;

-  // Default shm page size. Read: what page size shared memory will be backed
-  // with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
-  // Should be LDR_CNTRL SHMPSIZE.
-  static size_t _shm_default_page_size;
-
-  // True if sys V shm can be used with 64K pages dynamically.
-  // (via shmctl(.. SHM_PAGESIZE..). Should be true for AIX 53 and
-  // newer / PASE V6R1 and newer. (0 or 1, -1 if not initialized)
-  static int _can_use_64K_pages;
-
-  // True if sys V shm can be used with 16M pages dynamically.
-  // (via shmctl(.. SHM_PAGESIZE..). Only true on AIX 5.3 and
-  // newer, if the system was set up to use 16M pages and the
-  // jvm has enough user rights. (0 or 1, -1 if not initialized)
-  static int _can_use_16M_pages;
-
  static julong available_memory();
  static julong physical_memory() { return _physical_memory; }
  static void initialize_system_info();
@ -135,10 +119,6 @@ class Aix {
  // one of Aix::on_pase(), Aix::os_version().
  static void initialize_os_info();

-  static int commit_memory_impl(char* addr, size_t bytes, bool exec);
-  static int commit_memory_impl(char* addr, size_t bytes,
-                                size_t alignment_hint, bool exec);
-
  // Scan environment for important settings which might effect the
  // VM. Trace out settings. Warn about invalid settings and/or
  // correct them.
@ -146,10 +126,6 @@ class Aix {
  // Must run after os::Aix::initialue_os_info().
  static void scan_environment();

-  // Retrieve information about multipage size support. Will initialize
-  // _page_size, _stack_page_size, _can_use_64K_pages/_can_use_16M_pages
-  static void query_multipage_support();
-
  // Initialize libo4 (on PASE) and libperfstat (on AIX). Call this
  // before relying on functions from either lib, e.g. Aix::get_meminfo().
  static void initialize_libo4();
@ -187,27 +163,8 @@ class Aix {
    return _stack_page_size;
  }

-  // default shm page size. Read: what page size shared memory
-  // will be backed with if no page size was set explicitly using shmctl(SHM_PAGESIZE).
-  // Should be LDR_CNTRL SHMPSIZE.
-  static int shm_default_page_size(void) {
-    assert(_shm_default_page_size != -1, "not initialized");
-    return _shm_default_page_size;
-  }
-
-  // Return true if sys V shm can be used with 64K pages dynamically
-  // (via shmctl(.. SHM_PAGESIZE..).
-  static bool can_use_64K_pages () {
-    assert(_can_use_64K_pages != -1,  "not initialized");
-    return _can_use_64K_pages == 1 ? true : false;
-  }
-
-  // Return true if sys V shm can be used with 16M pages dynamically.
-  // (via shmctl(.. SHM_PAGESIZE..).
-  static bool can_use_16M_pages () {
-    assert(_can_use_16M_pages != -1,  "not initialized");
-    return _can_use_16M_pages == 1 ? true : false;
-  }
+  // This is used to scale stack space (guard pages etc.). The name is somehow misleading.
+  static int vm_default_page_size(void ) { return 8*K; }

  static address   ucontext_get_pc(const ucontext_t* uc);
  static intptr_t* ucontext_get_sp(ucontext_t* uc);
@ -269,6 +226,11 @@ class Aix {
    return _os_version;
  }

+  // Convenience method: returns true if running on PASE V5R4 or older.
+  static bool on_pase_V5R4_or_older() {
+    return on_pase() && os_version() <= 0x0504;
+  }
+
  // Convenience method: returns true if running on AIX 5.3 or older.
  static bool on_aix_53_or_older() {
    return on_aix() && os_version() <= 0x0503;
--- a/hotspot/src/os/aix/vm/os_aix.inline.hpp
+++ b/hotspot/src/os/aix/vm/os_aix.inline.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -40,7 +40,7 @@ inline void* os::thread_local_storage_at(int index) {
  return pthread_getspecific((pthread_key_t)index);
 }

-// File names are case-sensitive on windows only
+// File names are case-sensitive on windows only.
 inline int os::file_name_strcmp(const char* s1, const char* s2) {
  return strcmp(s1, s2);
 }
@ -53,18 +53,19 @@ inline bool os::uses_stack_guard_pages() {
  return true;
 }

+// Whether or not calling code should/can commit/uncommit stack pages
+// before guarding them. Answer for AIX is definitly no, because memory
+// is automatically committed on touch.
 inline bool os::allocate_stack_guard_pages() {
  assert(uses_stack_guard_pages(), "sanity check");
-  return true;
+  return false;
 }

-
 // On Aix, reservations are made on a page by page basis, nothing to do.
 inline void os::pd_split_reserved_memory(char *base, size_t size,
                                         size_t split, bool realloc) {
 }

-
 // Bang the shadow pages if they need to be touched to be mapped.
 inline void os::bang_stack_shadow_pages() {
 }
@ -75,15 +76,13 @@ inline void os::dll_unload(void *lib) {

 inline const int os::default_file_open_flags() { return 0;}

-inline DIR* os::opendir(const char* dirname)
-{
+inline DIR* os::opendir(const char* dirname) {
  assert(dirname != NULL, "just checking");
  return ::opendir(dirname);
 }

-inline int os::readdir_buf_size(const char *path)
-{
-  // according to aix sys/limits, NAME_MAX must be retrieved at runtime. */
+inline int os::readdir_buf_size(const char *path) {
+  // According to aix sys/limits, NAME_MAX must be retrieved at runtime.
  const long my_NAME_MAX = pathconf(path, _PC_NAME_MAX);
  return my_NAME_MAX + sizeof(dirent) + 1;
 }
@ -104,8 +103,7 @@ inline int os::ftruncate(int fd, jlong length) {
  return ::ftruncate64(fd, length);
 }

-inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
-{
+inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf) {
  dirent* p;
  int status;
  assert(dirp != NULL, "just checking");
@ -174,11 +172,11 @@ inline int os::send(int fd, char* buf, size_t nBytes, uint flags) {
  RESTARTABLE_RETURN_INT(::send(fd, buf, nBytes, flags));
 }

-inline int os::raw_send(int fd, char* buf, size_t nBytes, uint flags) {
+inline int os::raw_send(int fd, char *buf, size_t nBytes, uint flags) {
  return os::send(fd, buf, nBytes, flags);
 }

-inline int os::connect(int fd, struct sockaddr* him, socklen_t len) {
+inline int os::connect(int fd, struct sockaddr *him, socklen_t len) {
  RESTARTABLE_RETURN_INT(::connect(fd, him, len));
 }

--- a/hotspot/src/os/aix/vm/perfMemory_aix.cpp
+++ b/hotspot/src/os/aix/vm/perfMemory_aix.cpp
@ -797,7 +797,7 @@ static void cleanup_sharedmem_resources(const char* dirname) {
  // Close the directory and reset the current working directory.
  close_directory_secure_cwd(dirp, saved_cwd_fd);

-  FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
+  FREE_C_HEAP_ARRAY(char, dbuf);
 }

 // Make the user specific temporary directory. Returns true if
@ -1164,9 +1164,9 @@ static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemor
  // store file, we don't follow them when attaching either.
  //
  if (!is_directory_secure(dirname)) {
-    FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+    FREE_C_HEAP_ARRAY(char, dirname);
    if (luser != user) {
-      FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+      FREE_C_HEAP_ARRAY(char, luser);
    }
    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
              "Process not found");
--- a/hotspot/src/os/aix/vm/porting_aix.hpp
+++ b/hotspot/src/os/aix/vm/porting_aix.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -22,8 +22,18 @@
 *
 */

+#ifndef OS_AIX_VM_PORTING_AIX_HPP
+#define OS_AIX_VM_PORTING_AIX_HPP
+
 #include <stddef.h>

+// PPC port only:
+#define assert0(b) assert( (b), "" )
+#define guarantee0(b) assert( (b), "" )
+template <class T1, class T2> bool is_aligned_to(T1 what, T2 alignment) {
+  return  ( ((uintx)(what)) & (((uintx)(alignment)) - 1) ) == 0 ? true : false;
+}
+
 // Header file to contain porting-relevant code which does not have a
 // home anywhere else and which can not go into os_<platform>.h because
 // that header is included inside the os class definition, hence all
@ -79,3 +89,62 @@ int getFuncName(
      const struct tbtable** p_tb,     // [out] optional: ptr to traceback table to get further information
      char* p_errmsg, size_t errmsglen // [out] optional: user provided buffer for error messages
    );
+
+// -------------------------------------------------------------------------
+
+// A simple critical section which shall be based upon OS critical
+// sections (CRITICAL_SECTION resp. Posix Mutex) and nothing else.
+
+#include <pthread.h>
+
+namespace MiscUtils {
+  typedef pthread_mutex_t critsect_t;
+
+  inline void init_critsect(MiscUtils::critsect_t* cs) {
+    pthread_mutex_init(cs, NULL);
+  }
+  inline void free_critsect(MiscUtils::critsect_t* cs) {
+    pthread_mutex_destroy(cs);
+  }
+  inline void enter_critsect(MiscUtils::critsect_t* cs) {
+    pthread_mutex_lock(cs);
+  }
+  inline void leave_critsect(MiscUtils::critsect_t* cs) {
+    pthread_mutex_unlock(cs);
+  }
+
+  // Need to wrap this in an object because we need to dynamically initialize
+  // critical section (because of windows, where there is no way to initialize
+  // a CRITICAL_SECTION statically. On Unix, we could use
+  // PTHREAD_MUTEX_INITIALIZER)
+
+  // Note: The critical section does NOT get cleaned up in the destructor. That is
+  // by design: the CritSect class is only ever used as global objects whose
+  // lifetime spans the whole VM life; in that context we don't want the lock to
+  // be cleaned up when global C++ objects are destroyed, but to continue to work
+  // correctly right to the very end of the process life.
+  class CritSect {
+    critsect_t _cs;
+  public:
+    CritSect()        { init_critsect(&_cs); }
+    //~CritSect()       { free_critsect(&_cs); }
+    void enter()      { enter_critsect(&_cs); }
+    void leave()      { leave_critsect(&_cs); }
+  };
+
+  class AutoCritSect {
+    CritSect* const _pcsobj;
+  public:
+    AutoCritSect(CritSect* pcsobj)
+      : _pcsobj(pcsobj)
+    {
+      _pcsobj->enter();
+    }
+    ~AutoCritSect() {
+      _pcsobj->leave();
+    }
+  };
+
+}
+
+#endif // OS_AIX_VM_PORTING_AIX_HPP
--- a/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp
+++ b/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp
@ -129,7 +129,7 @@ class PICL {
    bool is_inconsistent()  { return _state == INCONSISTENT; }
    void set_inconsistent() { _state = INCONSISTENT;         }

-    void visit(picl_nodehdl_t nodeh, const char* name) {
+    bool visit(picl_nodehdl_t nodeh, const char* name) {
      assert(!is_inconsistent(), "Precondition");
      int curr;
      if (_picl->get_int_property(nodeh, name, &curr) == PICL_SUCCESS) {
@ -138,7 +138,9 @@ class PICL {
        } else if (curr != value()) { // following iterations
          set_inconsistent();
        }
+        return true;
      }
+      return false;
    }
  };

@ -155,8 +157,19 @@ class PICL {
      if (!l1_visitor->is_inconsistent()) {
        l1_visitor->visit(nodeh, "l1-dcache-line-size");
      }
-      if (!l2_visitor->is_inconsistent()) {
-        l2_visitor->visit(nodeh, "l2-cache-line-size");
+      static const char* l2_data_cache_line_property_name = NULL;
+      // On the first visit determine the name of the l2 cache line size property and memoize it.
+      if (l2_data_cache_line_property_name == NULL) {
+        assert(!l2_visitor->is_inconsistent(), "First iteration cannot be inconsistent");
+        l2_data_cache_line_property_name = "l2-cache-line-size";
+        if (!l2_visitor->visit(nodeh, l2_data_cache_line_property_name)) {
+          l2_data_cache_line_property_name = "l2-dcache-line-size";
+          l2_visitor->visit(nodeh, l2_data_cache_line_property_name);
+        }
+      } else {
+        if (!l2_visitor->is_inconsistent()) {
+          l2_visitor->visit(nodeh, l2_data_cache_line_property_name);
+        }
      }

      if (l1_visitor->is_inconsistent() && l2_visitor->is_inconsistent()) {
@ -172,13 +185,13 @@ class PICL {
    UniqueValueVisitor* l2_visitor() { return &_l2_visitor; }
  };
  int _L1_data_cache_line_size;
-  int _L2_cache_line_size;
+  int _L2_data_cache_line_size;
 public:
  static int visit_cpu(picl_nodehdl_t nodeh, void *state) {
    return CPUVisitor::visit(nodeh, state);
  }

-  PICL(bool is_fujitsu) : _L1_data_cache_line_size(0), _L2_cache_line_size(0), _dl_handle(NULL) {
+  PICL(bool is_fujitsu) : _L1_data_cache_line_size(0), _L2_data_cache_line_size(0), _dl_handle(NULL) {
    if (!open_library()) {
      return;
    }
@ -196,7 +209,7 @@ public:
          _L1_data_cache_line_size = cpu_visitor.l1_visitor()->value();
        }
        if (cpu_visitor.l2_visitor()->is_assigned()) {
-          _L2_cache_line_size = cpu_visitor.l2_visitor()->value();
+          _L2_data_cache_line_size = cpu_visitor.l2_visitor()->value();
        }
      }
      _picl_shutdown();
@ -205,7 +218,7 @@ public:
  }

  unsigned int L1_data_cache_line_size() const { return _L1_data_cache_line_size; }
-  unsigned int L2_cache_line_size() const      { return _L2_cache_line_size;      }
+  unsigned int L2_data_cache_line_size() const { return _L2_data_cache_line_size; }
 };


@ -431,7 +444,7 @@ int VM_Version::platform_features(int features) {
  // Figure out cache line sizes using PICL
  PICL picl((features & sparc64_family_m) != 0);
  _L1_data_cache_line_size = picl.L1_data_cache_line_size();
-  _L2_cache_line_size      = picl.L2_cache_line_size();
+  _L2_data_cache_line_size = picl.L2_data_cache_line_size();

  return features;
 }
--- a/hotspot/src/share/vm/adlc/adlparse.cpp
+++ b/hotspot/src/share/vm/adlc/adlparse.cpp
@ -800,6 +800,7 @@ void ADLParser::reg_parse(void) {
      }
      if (strcmp(token,"reg_def")==0)          { reg_def_parse(); }
      else if (strcmp(token,"reg_class")==0)   { reg_class_parse(); }
+      else if (strcmp(token, "reg_class_dynamic") == 0) { reg_class_dynamic_parse(); }
      else if (strcmp(token,"alloc_class")==0) { alloc_class_parse(); }
      else if (strcmp(token,"#define")==0)     { preproc_define(); }
      else { parse_err(SYNERR, "bad token %s inside register block.\n", token); break; }
@ -2323,11 +2324,12 @@ void ADLParser::reg_class_parse(void) {
  // Debug Stuff
  if (_AD._adl_debug >1) fprintf(stderr,"Register Class: %s\n", cname);

-  RegClass *reg_class = _AD._register->addRegClass(cname);
-
-  // Collect registers in class
  skipws();
  if (_curchar == '(') {
+    // A register list is defined for the register class.
+    // Collect registers into a generic RegClass register class.
+    RegClass* reg_class = _AD._register->addRegClass<RegClass>(cname);
+
    next_char();                  // Skip '('
    skipws();
    while (_curchar != ')') {
@ -2352,12 +2354,15 @@ void ADLParser::reg_class_parse(void) {
    }
    next_char();                  // Skip closing ')'
  } else if (_curchar == '%') {
+    // A code snippet is defined for the register class.
+    // Collect the code snippet into a CodeSnippetRegClass register class.
+    CodeSnippetRegClass* reg_class = _AD._register->addRegClass<CodeSnippetRegClass>(cname);
    char *code = find_cpp_block("reg class");
    if (code == NULL) {
      parse_err(SYNERR, "missing code declaration for reg class.\n");
      return;
    }
-    reg_class->_user_defined = code;
+    reg_class->set_code_snippet(code);
    return;
  }

@ -2374,6 +2379,87 @@ void ADLParser::reg_class_parse(void) {
  return;
 }

+//------------------------------reg_class_dynamic_parse------------------------
+void ADLParser::reg_class_dynamic_parse(void) {
+  char *cname; // Name of dynamic register class being defined
+
+  // Get register class name
+  skipws();
+  cname = get_ident();
+  if (cname == NULL) {
+    parse_err(SYNERR, "missing dynamic register class name after 'reg_class_dynamic'\n");
+    return;
+  }
+
+  if (_AD._adl_debug > 1) {
+    fprintf(stdout, "Dynamic Register Class: %s\n", cname);
+  }
+
+  skipws();
+  if (_curchar != '(') {
+    parse_err(SYNERR, "missing '(' at the beginning of reg_class_dynamic definition\n");
+    return;
+  }
+  next_char();
+  skipws();
+
+  // Collect two register classes and the C++ code representing the condition code used to
+  // select between the two classes into a ConditionalRegClass register class.
+  ConditionalRegClass* reg_class = _AD._register->addRegClass<ConditionalRegClass>(cname);
+  int i;
+  for (i = 0; i < 2; i++) {
+    char* name = get_ident();
+    if (name == NULL) {
+      parse_err(SYNERR, "missing class identifier inside reg_class_dynamic list.\n");
+      return;
+    }
+    RegClass* rc = _AD._register->getRegClass(name);
+    if (rc == NULL) {
+      parse_err(SEMERR, "unknown identifier %s inside reg_class_dynamic list.\n", name);
+    } else {
+      reg_class->set_rclass_at_index(i, rc);
+    }
+
+    skipws();
+    if (_curchar == ',') {
+      next_char();
+      skipws();
+    } else {
+      parse_err(SYNERR, "missing separator ',' inside reg_class_dynamic list.\n");
+    }
+  }
+
+  // Collect the condition code.
+  skipws();
+  if (_curchar == '%') {
+    char* code = find_cpp_block("reg class dynamic");
+    if (code == NULL) {
+       parse_err(SYNERR, "missing code declaration for reg_class_dynamic.\n");
+       return;
+    }
+    reg_class->set_condition_code(code);
+  } else {
+    parse_err(SYNERR, "missing %% at the beginning of code block in reg_class_dynamic definition\n");
+    return;
+  }
+
+  skipws();
+  if (_curchar != ')') {
+    parse_err(SYNERR, "missing ')' at the end of reg_class_dynamic definition\n");
+    return;
+  }
+  next_char();
+
+  skipws();
+  if (_curchar != ';') {
+    parse_err(SYNERR, "missing ';' at the end of reg_class_dynamic definition.\n");
+    return;
+  }
+  next_char();                    // Skip trailing ';'
+
+  return;
+}
+
 //------------------------------alloc_class_parse------------------------------
 void ADLParser::alloc_class_parse(void) {
  char *name;                     // Name of allocation class being defined
--- a/hotspot/src/share/vm/adlc/adlparse.hpp
+++ b/hotspot/src/share/vm/adlc/adlparse.hpp
@ -53,6 +53,8 @@ class ConstructRule;
 // ***** Register Section *****
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 // ***** Pipeline Section *****
@ -125,6 +127,7 @@ protected:
  // Parse components of the register section
  void reg_def_parse(void);              // Parse register definition
  void reg_class_parse(void);            // Parse register class definition
+  void reg_class_dynamic_parse(void);    // Parse dynamic register class definition
  void alloc_class_parse(void);          // Parse allocation class definition

  // Parse components of the definition section
--- a/hotspot/src/share/vm/adlc/archDesc.cpp
+++ b/hotspot/src/share/vm/adlc/archDesc.cpp
@ -908,7 +908,7 @@ char *ArchDesc::stack_or_reg_mask(OperandForm  &opForm) {
 void ArchDesc::set_stack_or_reg(const char *reg_class_name) {
  if( _register ) {
    RegClass *reg_class  = _register->getRegClass(reg_class_name);
-    reg_class->_stack_or_reg = true;
+    reg_class->set_stack_version(true);
  }
 }

--- a/hotspot/src/share/vm/adlc/forms.hpp
+++ b/hotspot/src/share/vm/adlc/forms.hpp
@ -68,6 +68,8 @@ class Opcode;
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeClassForm;
--- a/hotspot/src/share/vm/adlc/formsopt.cpp
+++ b/hotspot/src/share/vm/adlc/formsopt.cpp
@ -47,13 +47,19 @@ void RegisterForm::addRegDef(char *name, char *callingConv, char *c_conv,
 }

 // record a new register class
-RegClass *RegisterForm::addRegClass(const char *className) {
-  RegClass *regClass = new RegClass(className);
+template <typename T>
+T* RegisterForm::addRegClass(const char* className) {
+  T* regClass = new T(className);
  _rclasses.addName(className);
-  _regClass.Insert(className,regClass);
+  _regClass.Insert(className, regClass);
  return regClass;
 }

+// Explicit instantiation for all supported register classes.
+template RegClass* RegisterForm::addRegClass<RegClass>(const char* className);
+template CodeSnippetRegClass* RegisterForm::addRegClass<CodeSnippetRegClass>(const char* className);
+template ConditionalRegClass* RegisterForm::addRegClass<ConditionalRegClass>(const char* className);
+
 // record a new register class
 AllocClass *RegisterForm::addAllocClass(char *className) {
  AllocClass *allocClass = new AllocClass(className);
@ -67,9 +73,9 @@ AllocClass *RegisterForm::addAllocClass(char *className) {
 void RegisterForm::addSpillRegClass() {
  // Stack slots start at the next available even register number.
  _reg_ctr = (_reg_ctr+7) & ~7;
-  const char *rc_name   = "stack_slots";
-  RegClass   *reg_class = new RegClass(rc_name);
-  reg_class->_stack_or_reg = true;
+  const char *rc_name = "stack_slots";
+  RegClass* reg_class = new RegClass(rc_name);
+  reg_class->set_stack_version(true);
  _rclasses.addName(rc_name);
  _regClass.Insert(rc_name,reg_class);
 }
@ -224,9 +230,11 @@ void RegDef::output(FILE *fp) {         // Write info to output files

 //------------------------------RegClass---------------------------------------
 // Construct a register class into which registers will be inserted
-RegClass::RegClass(const char *classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr,hashstr, Form::arena),
-                                          _user_defined(NULL)
-{
+RegClass::RegClass(const char* classid) : _stack_or_reg(false), _classid(classid), _regDef(cmpstr, hashstr, Form::arena) {
+}
+
+RegClass::~RegClass() {
+  delete _classid;
 }

 // record a register in this class
@ -305,6 +313,91 @@ void RegClass::output(FILE *fp) {           // Write info to output files
  fprintf(fp,"--- done with entries for reg_class %s\n\n",_classid);
 }

+void RegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "extern const RegMask _%s%s_mask;\n", prefix,  rc_name_to_upper);
+  fprintf(fp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+  if (_stack_or_reg) {
+    fprintf(fp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
+    fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
+  }
+  delete[] rc_name_to_upper;
+}
+
+void RegClass::build_register_masks(FILE* fp) {
+  int len = RegisterForm::RegMask_Size();
+  const char *prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
+
+  int i;
+  for(i = 0; i < len - 1; i++) {
+    fprintf(fp," 0x%x,", regs_in_word(i, false));
+  }
+  fprintf(fp," 0x%x );\n", regs_in_word(i, false));
+
+  if (_stack_or_reg) {
+    fprintf(fp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
+    for(i = 0; i < len - 1; i++) {
+      fprintf(fp," 0x%x,", regs_in_word(i, true));
+    }
+    fprintf(fp," 0x%x );\n", regs_in_word(i, true));
+  }
+  delete[] rc_name_to_upper;
+}
+
+//------------------------------CodeSnippetRegClass---------------------------
+CodeSnippetRegClass::CodeSnippetRegClass(const char* classid) : RegClass(classid), _code_snippet(NULL) {
+}
+
+CodeSnippetRegClass::~CodeSnippetRegClass() {
+  delete _code_snippet;
+}
+
+void CodeSnippetRegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  fprintf(fp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, _code_snippet);
+  delete[] rc_name_to_upper;
+}
+
+//------------------------------ConditionalRegClass---------------------------
+ConditionalRegClass::ConditionalRegClass(const char *classid) : RegClass(classid), _condition_code(NULL) {
+}
+
+ConditionalRegClass::~ConditionalRegClass() {
+  delete _condition_code;
+}
+
+void ConditionalRegClass::declare_register_masks(FILE* fp) {
+  const char* prefix = "";
+  const char* rc_name_to_upper = toUpper(_classid);
+  const char* rclass_0_to_upper = toUpper(_rclasses[0]->_classid);
+  const char* rclass_1_to_upper = toUpper(_rclasses[1]->_classid);
+  fprintf(fp, "inline const RegMask &%s%s_mask() {"
+              " return (%s) ?"
+              " %s%s_mask() :"
+              " %s%s_mask(); }\n",
+              prefix, rc_name_to_upper,
+              _condition_code,
+              prefix, rclass_0_to_upper,
+              prefix, rclass_1_to_upper);
+  if (_stack_or_reg) {
+    fprintf(fp, "inline const RegMask &%sSTACK_OR_%s_mask() {"
+                  " return (%s) ?"
+                  " %sSTACK_OR_%s_mask() :"
+                  " %sSTACK_OR_%s_mask(); }\n",
+                  prefix, rc_name_to_upper,
+                  _condition_code,
+                  prefix, rclass_0_to_upper,
+                  prefix, rclass_1_to_upper);
+  }
+  delete[] rc_name_to_upper;
+  delete[] rclass_0_to_upper;
+  delete[] rclass_1_to_upper;
+  return;
+}

 //------------------------------AllocClass-------------------------------------
 AllocClass::AllocClass(char *classid) : _classid(classid), _regDef(cmpstr,hashstr, Form::arena) {
--- a/hotspot/src/share/vm/adlc/formsopt.hpp
+++ b/hotspot/src/share/vm/adlc/formsopt.hpp
@ -60,6 +60,8 @@ class Opcode;
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeClassForm;
@ -98,7 +100,8 @@ public:

  void        addRegDef(char *regName, char *callingConv, char *c_conv,
                        char * idealtype, char *encoding, char* concreteName);
-  RegClass   *addRegClass(const char *className);
+  template<typename T> T* addRegClass(const char* className);
+
  AllocClass *addAllocClass(char *allocName);
  void        addSpillRegClass();

@ -154,17 +157,28 @@ public:
 };

 //------------------------------RegClass---------------------------------------
+// Generic register class. This register class is the internal representation
+// for the following .ad file format:
+//
+//  reg_class ptr(RAX, RBX, ...);
+//
+// where ptr is the name of the register class, RAX and RBX are registers.
+//
+// This register class allows registers to be spilled onto the stack. Spilling
+// is allowed is field _stack_or_reg is true.
 class RegClass : public Form {
 public:
  // Public Data
  const char *_classid;         // Name of class
  NameList    _regDefs;         // List of registers in class
  Dict        _regDef;          // Dictionary of registers in class
+protected:
  bool        _stack_or_reg;    // Allowed on any stack slot
-  char*       _user_defined;

+public:
  // Public Methods
  RegClass(const char *classid);// Constructor
+  virtual ~RegClass();

  void addReg(RegDef *regDef);  // Add a register to this class

@ -183,6 +197,115 @@ public:

  void dump();                  // Debug printer
  void output(FILE *fp);        // Write info to output files
+
+  virtual bool has_stack_version() {
+    return _stack_or_reg;
+  }
+  virtual void set_stack_version(bool flag) {
+    _stack_or_reg = flag;
+  }
+
+  virtual void declare_register_masks(FILE* fp);
+  virtual void build_register_masks(FILE* fp);
+};
+
+//------------------------------CodeSnippetRegClass----------------------------
+// Register class that has an user-defined C++ code snippet attached to it
+// to determine at runtime which register class to use. This register class is
+// the internal representation for the following .ad file format:
+//
+//  reg_class actual_dflt_reg %{
+//      if (VM_Version::has_vfp3_32()) {
+//          return DFLT_REG_mask();
+//      } else {
+//          return DFLT_LOW_REG_mask();
+//      }
+//  %}
+//
+// where DFLT_REG_mask() and DFLT_LOW_REG_mask() are the internal names of the
+// masks of register classes dflt_reg and dflt_low_reg.
+//
+// The attached code snippet can select also between more than two register classes.
+// This register class can be, however, used only if the register class is not
+// cisc-spillable (i.e., the registers of this class are not allowed on the stack,
+// which is equivalent with _stack_or_reg being false).
+class CodeSnippetRegClass : public RegClass {
+protected:
+  char* _code_snippet;
+public:
+  CodeSnippetRegClass(const char* classid);// Constructor
+  ~CodeSnippetRegClass();
+
+  void set_code_snippet(char* code) {
+    _code_snippet = code;
+  }
+  char* code_snippet() {
+    return _code_snippet;
+  }
+  void set_stack_version(bool flag) {
+    assert(false, "User defined register classes are not allowed to spill to the stack.");
+  }
+  void declare_register_masks(FILE* fp);
+  void build_register_masks(FILE* fp) {
+    // We do not need to generate register masks because we select at runtime
+    // between register masks generated for other register classes.
+    return;
+  }
+};
+
+//------------------------------ConditionalRegClass----------------------------
+// Register class that has two register classes and a runtime condition attached
+// to it. The condition is evaluated at runtime and either one of the register
+// attached register classes is selected. This register class is the internal
+// representation for the following .ad format:
+//
+//  reg_class_dynamic actual_dflt_reg(dflt_reg, low_reg,
+//                                    %{ VM_Version::has_vfp3_32() }%
+//                                    );
+//
+// This example is equivalent to the example used with the CodeSnippetRegClass
+// register class. A ConditionalRegClass works also if a register class is cisc-spillable
+// (i.e., _stack_or_reg is true), but if can select only between two register classes.
+class ConditionalRegClass : public RegClass {
+protected:
+  // reference to condition code
+  char* _condition_code;  // C++ condition code to dynamically determine which register class to use.
+
+                          // Example syntax (equivalent to previous example):
+                          //
+                          //  reg_class actual_dflt_reg(dflt_reg, low_reg,
+                          //                            %{ VM_Version::has_vfp3_32() }%
+                          //                            );
+  // reference to conditional register classes
+  RegClass* _rclasses[2]; // 0 is the register class selected if the condition code returns true
+                          // 1 is the register class selected if the condition code returns false
+public:
+  ConditionalRegClass(const char* classid);// Constructor
+  ~ConditionalRegClass();
+
+  virtual void set_stack_version(bool flag) {
+    RegClass::set_stack_version(flag);
+    assert((_rclasses[0] != NULL), "Register class NULL for condition code == true");
+    assert((_rclasses[1] != NULL), "Register class NULL for condition code == false");
+    _rclasses[0]->set_stack_version(flag);
+    _rclasses[1]->set_stack_version(flag);
+  }
+  void declare_register_masks(FILE* fp);
+  void build_register_masks(FILE* fp) {
+    // We do not need to generate register masks because we select at runtime
+    // between register masks generated for other register classes.
+    return;
+  }
+  void set_rclass_at_index(int index, RegClass* rclass) {
+    assert((0 <= index && index < 2), "Condition code can select only between two register classes");
+    _rclasses[index] = rclass;
+  }
+  void set_condition_code(char* code) {
+    _condition_code = code;
+  }
+  char* condition_code() {
+    return _condition_code;
+  }
 };

 //------------------------------AllocClass-------------------------------------
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@ -4043,6 +4043,13 @@ int MatchRule::is_expensive() const {
        strcmp(opType,"ReplicateL")==0 ||
        strcmp(opType,"ReplicateF")==0 ||
        strcmp(opType,"ReplicateD")==0 ||
+        strcmp(opType,"AddReductionVI")==0 ||
+        strcmp(opType,"AddReductionVL")==0 ||
+        strcmp(opType,"AddReductionVF")==0 ||
+        strcmp(opType,"AddReductionVD")==0 ||
+        strcmp(opType,"MulReductionVI")==0 ||
+        strcmp(opType,"MulReductionVF")==0 ||
+        strcmp(opType,"MulReductionVD")==0 ||
        0 /* 0 to line up columns nicely */ )
      return 1;
  }
@ -4135,6 +4142,10 @@ bool MatchRule::is_vector() const {
    "MulVS","MulVI","MulVF","MulVD",
    "DivVF","DivVD",
    "AndV" ,"XorV" ,"OrV",
+    "AddReductionVI", "AddReductionVL",
+    "AddReductionVF", "AddReductionVD",
+    "MulReductionVI",
+    "MulReductionVF", "MulReductionVD",
    "LShiftCntV","RShiftCntV",
    "LShiftVB","LShiftVS","LShiftVI","LShiftVL",
    "RShiftVB","RShiftVS","RShiftVI","RShiftVL",
--- a/hotspot/src/share/vm/adlc/formssel.hpp
+++ b/hotspot/src/share/vm/adlc/formssel.hpp
@ -59,6 +59,8 @@ class Opcode;
 class InsEncode;
 class RegDef;
 class RegClass;
+class CodeSnippetRegClass;
+class ConditionalRegClass;
 class AllocClass;
 class ResourceForm;
 class PipeDesc;
--- a/hotspot/src/share/vm/adlc/output_c.cpp
+++ b/hotspot/src/share/vm/adlc/output_c.cpp
@ -138,26 +138,9 @@ void ArchDesc::declare_register_masks(FILE *fp_hpp) {
    fprintf(fp_hpp,"// Register masks, one for each register class.\n");
    _register->_rclasses.reset();
    for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
-      const char *prefix = "";
      RegClass *reg_class = _register->getRegClass(rc_name);
      assert(reg_class, "Using an undefined register class");
-
-      const char* rc_name_to_upper = toUpper(rc_name);
-
-      if (reg_class->_user_defined == NULL) {
-        fprintf(fp_hpp, "extern const RegMask _%s%s_mask;\n", prefix,  rc_name_to_upper);
-        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { return _%s%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
-      } else {
-        fprintf(fp_hpp, "inline const RegMask &%s%s_mask() { %s }\n", prefix, rc_name_to_upper, reg_class->_user_defined);
-      }
-
-      if (reg_class->_stack_or_reg) {
-        assert(reg_class->_user_defined == NULL, "no user defined reg class here");
-        fprintf(fp_hpp, "extern const RegMask _%sSTACK_OR_%s_mask;\n", prefix, rc_name_to_upper);
-        fprintf(fp_hpp, "inline const RegMask &%sSTACK_OR_%s_mask() { return _%sSTACK_OR_%s_mask; }\n", prefix, rc_name_to_upper, prefix, rc_name_to_upper);
-      }
-      delete[] rc_name_to_upper;
-
+      reg_class->declare_register_masks(fp_hpp);
    }
  }
 }
@ -173,35 +156,9 @@ void ArchDesc::build_register_masks(FILE *fp_cpp) {
    fprintf(fp_cpp,"// Register masks, one for each register class.\n");
    _register->_rclasses.reset();
    for (rc_name = NULL; (rc_name = _register->_rclasses.iter()) != NULL;) {
-      const char *prefix = "";
      RegClass *reg_class = _register->getRegClass(rc_name);
      assert(reg_class, "Using an undefined register class");
-
-      if (reg_class->_user_defined != NULL) {
-        continue;
-      }
-
-      int len = RegisterForm::RegMask_Size();
-      const char* rc_name_to_upper = toUpper(rc_name);
-      fprintf(fp_cpp, "const RegMask _%s%s_mask(", prefix, rc_name_to_upper);
-
-      {
-        int i;
-        for(i = 0; i < len - 1; i++) {
-          fprintf(fp_cpp," 0x%x,", reg_class->regs_in_word(i, false));
-        }
-        fprintf(fp_cpp," 0x%x );\n", reg_class->regs_in_word(i, false));
-      }
-
-      if (reg_class->_stack_or_reg) {
-        int i;
-        fprintf(fp_cpp, "const RegMask _%sSTACK_OR_%s_mask(", prefix, rc_name_to_upper);
-        for(i = 0; i < len - 1; i++) {
-          fprintf(fp_cpp," 0x%x,",reg_class->regs_in_word(i, true));
-        }
-        fprintf(fp_cpp," 0x%x );\n",reg_class->regs_in_word(i, true));
-      }
-      delete[] rc_name_to_upper;
+      reg_class->build_register_masks(fp_cpp);
    }
  }
 }
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
@ -3462,6 +3462,24 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
    case vmIntrinsics::_putFloat  : return append_unsafe_put_obj(callee, T_FLOAT,   false);
    case vmIntrinsics::_putDouble : return append_unsafe_put_obj(callee, T_DOUBLE,  false);

+    case vmIntrinsics::_getShortUnaligned  :
+      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_SHORT,   false) : false;
+    case vmIntrinsics::_getCharUnaligned   :
+      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_CHAR,    false) : false;
+    case vmIntrinsics::_getIntUnaligned    :
+      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_INT,     false) : false;
+    case vmIntrinsics::_getLongUnaligned   :
+      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_LONG,    false) : false;
+
+    case vmIntrinsics::_putShortUnaligned  :
+      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_SHORT,   false) : false;
+    case vmIntrinsics::_putCharUnaligned   :
+      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_CHAR,    false) : false;
+    case vmIntrinsics::_putIntUnaligned    :
+      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_INT,     false) : false;
+    case vmIntrinsics::_putLongUnaligned   :
+      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_LONG,    false) : false;
+
    case vmIntrinsics::_getObjectVolatile : return append_unsafe_get_obj(callee, T_OBJECT,  true);
    case vmIntrinsics::_getBooleanVolatile: return append_unsafe_get_obj(callee, T_BOOLEAN, true);
    case vmIntrinsics::_getByteVolatile   : return append_unsafe_get_obj(callee, T_BYTE,    true);
--- a/hotspot/src/share/vm/ci/ciMethod.cpp
+++ b/hotspot/src/share/vm/ci/ciMethod.cpp
@ -688,7 +688,8 @@ bool ciMethod::parameter_profiled_type(int i, ciKlass*& type, bool& maybe_null)
 // via assert_unique_concrete_method or assert_leaf_type.
 ciMethod* ciMethod::find_monomorphic_target(ciInstanceKlass* caller,
                                            ciInstanceKlass* callee_holder,
-                                            ciInstanceKlass* actual_recv) {
+                                            ciInstanceKlass* actual_recv,
+                                            bool check_access) {
  check_is_loaded();

  if (actual_recv->is_interface()) {
@ -696,7 +697,7 @@ ciMethod* ciMethod::find_monomorphic_target(ciInstanceKlass* caller,
    return NULL;
  }

-  ciMethod* root_m = resolve_invoke(caller, actual_recv);
+  ciMethod* root_m = resolve_invoke(caller, actual_recv, check_access);
  if (root_m == NULL) {
    // Something went wrong looking up the actual receiver method.
    return NULL;
@ -775,7 +776,7 @@ ciMethod* ciMethod::find_monomorphic_target(ciInstanceKlass* caller,
 //
 // Given a known receiver klass, find the target for the call.
 // Return NULL if the call has no target or the target is abstract.
-ciMethod* ciMethod::resolve_invoke(ciKlass* caller, ciKlass* exact_receiver) {
+ciMethod* ciMethod::resolve_invoke(ciKlass* caller, ciKlass* exact_receiver, bool check_access) {
   check_is_loaded();
   VM_ENTRY_MARK;

@ -792,9 +793,9 @@ ciMethod* ciMethod::resolve_invoke(ciKlass* caller, ciKlass* exact_receiver) {
        ||
       InstanceKlass::cast(h_recv())->is_linked() && !exact_receiver->is_interface()) {
     if (holder()->is_interface()) {
-       m = LinkResolver::resolve_interface_call_or_null(h_recv, h_resolved, h_name, h_signature, caller_klass);
+       m = LinkResolver::resolve_interface_call_or_null(h_recv, h_resolved, h_name, h_signature, caller_klass, check_access);
     } else {
-       m = LinkResolver::resolve_virtual_call_or_null(h_recv, h_resolved, h_name, h_signature, caller_klass);
+       m = LinkResolver::resolve_virtual_call_or_null(h_recv, h_resolved, h_name, h_signature, caller_klass, check_access);
     }
   }

--- a/hotspot/src/share/vm/ci/ciMethod.hpp
+++ b/hotspot/src/share/vm/ci/ciMethod.hpp
@ -255,11 +255,12 @@ class ciMethod : public ciMetadata {
  // its calling environment.
  ciMethod* find_monomorphic_target(ciInstanceKlass* caller,
                                    ciInstanceKlass* callee_holder,
-                                    ciInstanceKlass* actual_receiver);
+                                    ciInstanceKlass* actual_receiver,
+                                    bool check_access = true);

  // Given a known receiver klass, find the target for the call.
  // Return NULL if the call has no target or is abstract.
-  ciMethod* resolve_invoke(ciKlass* caller, ciKlass* exact_receiver);
+  ciMethod* resolve_invoke(ciKlass* caller, ciKlass* exact_receiver, bool check_access = true);

  // Find the proper vtable index to invoke this method.
  int resolve_vtable_index(ciKlass* caller, ciKlass* receiver);
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -868,9 +868,12 @@
                                                                                                                        \
  /* Custom branch frequencies profiling support for JSR292 */                                                          \
  do_class(java_lang_invoke_MethodHandleImpl,               "java/lang/invoke/MethodHandleImpl")                        \
-  do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature,    F_S)  \
-   do_name(     profileBoolean_name,                               "profileBoolean")                                     \
-   do_signature(profileBoolean_signature,                           "(Z[I)Z")                                            \
+  do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature, F_S)  \
+   do_name(     profileBoolean_name,                             "profileBoolean")                                      \
+   do_signature(profileBoolean_signature,                        "(Z[I)Z")                                              \
+  do_intrinsic(_isCompileConstant, java_lang_invoke_MethodHandleImpl, isCompileConstant_name, isCompileConstant_signature, F_S) \
+   do_name(     isCompileConstant_name,                          "isCompileConstant")                                   \
+   do_alias(    isCompileConstant_signature,                      object_boolean_signature)                             \
                                                                                                                        \
  /* unsafe memory references (there are a lot of them...) */                                                           \
  do_signature(getObject_signature,       "(Ljava/lang/Object;J)Ljava/lang/Object;")                                    \
@ -950,6 +953,20 @@
  do_intrinsic(_putFloatVolatile,         sun_misc_Unsafe,        putFloatVolatile_name, putFloat_signature,     F_RN)  \
  do_intrinsic(_putDoubleVolatile,        sun_misc_Unsafe,        putDoubleVolatile_name, putDouble_signature,   F_RN)  \
                                                                                                                        \
+  do_name(getShortUnaligned_name,"getShortUnaligned")     do_name(putShortUnaligned_name,"putShortUnaligned")           \
+  do_name(getCharUnaligned_name,"getCharUnaligned")       do_name(putCharUnaligned_name,"putCharUnaligned")             \
+  do_name(getIntUnaligned_name,"getIntUnaligned")         do_name(putIntUnaligned_name,"putIntUnaligned")               \
+  do_name(getLongUnaligned_name,"getLongUnaligned")       do_name(putLongUnaligned_name,"putLongUnaligned")             \
+                                                                                                                        \
+  do_intrinsic(_getShortUnaligned,         sun_misc_Unsafe,        getShortUnaligned_name, getShort_signature,     F_R)  \
+  do_intrinsic(_getCharUnaligned,          sun_misc_Unsafe,        getCharUnaligned_name, getChar_signature,       F_R)  \
+  do_intrinsic(_getIntUnaligned,           sun_misc_Unsafe,        getIntUnaligned_name, getInt_signature,         F_R)  \
+  do_intrinsic(_getLongUnaligned,          sun_misc_Unsafe,        getLongUnaligned_name, getLong_signature,       F_R)  \
+  do_intrinsic(_putShortUnaligned,         sun_misc_Unsafe,        putShortUnaligned_name, putShort_signature,     F_R)  \
+  do_intrinsic(_putCharUnaligned,          sun_misc_Unsafe,        putCharUnaligned_name, putChar_signature,       F_R)  \
+  do_intrinsic(_putIntUnaligned,           sun_misc_Unsafe,        putIntUnaligned_name, putInt_signature,         F_R)  \
+  do_intrinsic(_putLongUnaligned,          sun_misc_Unsafe,        putLongUnaligned_name, putLong_signature,       F_R)  \
+                                                                                                                        \
  /* %%% these are redundant except perhaps for getAddress, but Unsafe has native methods for them */                   \
  do_signature(getByte_raw_signature,     "(J)B")                                                                       \
  do_signature(putByte_raw_signature,     "(JB)V")                                                                      \
--- a/hotspot/src/share/vm/code/dependencies.cpp
+++ b/hotspot/src/share/vm/code/dependencies.cpp
@ -845,7 +845,13 @@ class ClassHierarchyWalker {
    assert((uint)n <= (uint)_num_participants, "oob");
    Method* fm = _found_methods[n];
    assert(n == _num_participants || fm != NULL, "proper usage");
-    assert(fm == NULL || fm->method_holder() == _participants[n], "sanity");
+    if (fm != NULL && fm->method_holder() != _participants[n]) {
+      // Default methods from interfaces can be added to classes. In
+      // that case the holder of the method is not the class but the
+      // interface where it's defined.
+      assert(fm->is_default_method(), "sanity");
+      return NULL;
+    }
    return fm;
  }

--- a/hotspot/src/share/vm/code/nmethod.cpp
+++ b/hotspot/src/share/vm/code/nmethod.cpp
@ -504,7 +504,7 @@ nmethod* nmethod::new_native_nmethod(methodHandle method,
                                            basic_lock_owner_sp_offset,
                                            basic_lock_sp_offset, oop_maps);
    NOT_PRODUCT(if (nm != NULL)  nmethod_stats.note_native_nmethod(nm));
-    if (PrintAssembly && nm != NULL) {
+    if ((PrintAssembly || CompilerOracle::should_print(method)) && nm != NULL) {
      Disassembler::decode(nm);
    }
  }
@ -2837,11 +2837,21 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) {
          st.print(")");
          return st.as_string();
        }
+        case relocInfo::runtime_call_type: {
+          stringStream st;
+          st.print("runtime_call");
+          runtime_call_Relocation* r = iter.runtime_call_reloc();
+          address dest = r->destination();
+          CodeBlob* cb = CodeCache::find_blob(dest);
+          if (cb != NULL) {
+            st.print(" %s", cb->name());
+          }
+          return st.as_string();
+        }
        case relocInfo::virtual_call_type:     return "virtual_call";
        case relocInfo::opt_virtual_call_type: return "optimized virtual_call";
        case relocInfo::static_call_type:      return "static_call";
        case relocInfo::static_stub_type:      return "static_stub";
-        case relocInfo::runtime_call_type:     return "runtime_call";
        case relocInfo::external_word_type:    return "external_word";
        case relocInfo::internal_word_type:    return "internal_word";
        case relocInfo::section_word_type:     return "section_word";
--- a/hotspot/src/share/vm/code/pcDesc.cpp
+++ b/hotspot/src/share/vm/code/pcDesc.cpp
@ -54,12 +54,7 @@ void PcDesc::print(nmethod* code) {
  for (ScopeDesc* sd = code->scope_desc_at(real_pc(code));
       sd != NULL;
       sd = sd->sender()) {
-    tty->print("  ");
-    sd->method()->print_short_name(tty);
-    tty->print("  @%d", sd->bci());
-    if (sd->should_reexecute())
-      tty->print("  reexecute=true");
-    tty->cr();
+    sd->print_on(tty);
  }
 #endif
 }
--- a/hotspot/src/share/vm/code/scopeDesc.cpp
+++ b/hotspot/src/share/vm/code/scopeDesc.cpp
@ -157,14 +157,18 @@ ScopeDesc* ScopeDesc::sender() const {
 #ifndef PRODUCT

 void ScopeDesc::print_value_on(outputStream* st) const {
-  tty->print("   ");
+  st->print("  ");
  method()->print_short_name(st);
  int lineno = method()->line_number_from_bci(bci());
  if (lineno != -1) {
-    st->print_cr("@%d (line %d)", bci(), lineno);
+    st->print("@%d (line %d)", bci(), lineno);
  } else {
-    st->print_cr("@%d", bci());
+    st->print("@%d", bci());
  }
+  if (should_reexecute()) {
+    st->print("  reexecute=true");
+  }
+  st->cr();
 }

 void ScopeDesc::print_on(outputStream* st) const {
@ -174,7 +178,7 @@ void ScopeDesc::print_on(outputStream* st) const {
 void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
  // header
  if (pd != NULL) {
-    tty->print_cr("ScopeDesc(pc=" PTR_FORMAT " offset=%x):", pd->real_pc(_code), pd->pc_offset());
+    st->print_cr("ScopeDesc(pc=" PTR_FORMAT " offset=%x):", pd->real_pc(_code), pd->pc_offset());
  }

  print_value_on(st);
@ -192,7 +196,7 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
  // locals
  { GrowableArray<ScopeValue*>* l = ((ScopeDesc*) this)->locals();
    if (l != NULL) {
-      tty->print_cr("   Locals");
+      st->print_cr("   Locals");
      for (int index = 0; index < l->length(); index++) {
        st->print("    - l%d: ", index);
        l->at(index)->print_on(st);
@ -205,7 +209,7 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {
    if (l != NULL) {
      st->print_cr("   Expression stack");
      for (int index = 0; index < l->length(); index++) {
-        st->print("   - @%d: ", index);
+        st->print("    - @%d: ", index);
        l->at(index)->print_on(st);
        st->cr();
      }
@ -225,12 +229,12 @@ void ScopeDesc::print_on(outputStream* st, PcDesc* pd) const {

 #ifdef COMPILER2
  if (DoEscapeAnalysis && is_top() && _objects != NULL) {
-    tty->print_cr("Objects");
+    st->print_cr("   Objects");
    for (int i = 0; i < _objects->length(); i++) {
      ObjectValue* sv = (ObjectValue*) _objects->at(i);
-      tty->print(" - %d: ", sv->id());
-      sv->print_fields_on(tty);
-      tty->cr();
+      st->print("    - %d: ", sv->id());
+      sv->print_fields_on(st);
+      st->cr();
    }
  }
 #endif // COMPILER2
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
@ -25,8 +25,8 @@
 #include "precompiled.hpp"
 #include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/freeBlockDictionary.hpp"
-#include "memory/sharedHeap.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/orderAccess.inline.hpp"
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.cpp
@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp"
+#include "memory/iterator.inline.hpp"
+#include "memory/specialized_oop_closures.hpp"
+
+// Generate CMS specialized oop_oop_iterate functions.
+SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_CMS(ALL_KLASS_OOP_OOP_ITERATE_DEFN)
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -32,6 +32,7 @@
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
+#include "memory/genCollectedHeap.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/space.inline.hpp"
 #include "memory/universe.inline.hpp"
@ -673,10 +674,10 @@ void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr,
                                                 HeapWord* bottom,              \
                                                 HeapWord* top,                 \
                                                 ClosureType* cl) {             \
-   bool is_par = SharedHeap::heap()->n_par_threads() > 0;                       \
+   bool is_par = GenCollectedHeap::heap()->n_par_threads() > 0;                 \
   if (is_par) {                                                                \
-     assert(SharedHeap::heap()->n_par_threads() ==                              \
-            SharedHeap::heap()->workers()->active_workers(), "Mismatch");       \
+     assert(GenCollectedHeap::heap()->n_par_threads() ==                        \
+            GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch"); \
     walk_mem_region_with_cl_par(mr, bottom, top, cl);                          \
   } else {                                                                     \
     walk_mem_region_with_cl_nopar(mr, bottom, top, cl);                        \
@ -1907,11 +1908,11 @@ CompactibleFreeListSpace::splitChunkAndReturnRemainder(FreeChunk* chunk,
  assert(chunk->is_free() && ffc->is_free(), "Error");
  _bt.split_block((HeapWord*)chunk, chunk->size(), new_size);
  if (rem_sz < SmallForDictionary) {
-    bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
+    bool is_par = (GenCollectedHeap::heap()->n_par_threads() > 0);
    if (is_par) _indexedFreeListParLocks[rem_sz]->lock();
    assert(!is_par ||
-           (SharedHeap::heap()->n_par_threads() ==
-            SharedHeap::heap()->workers()->active_workers()), "Mismatch");
+           (GenCollectedHeap::heap()->n_par_threads() ==
+            GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch");
    returnChunkToFreeList(ffc);
    split(size, rem_sz);
    if (is_par) _indexedFreeListParLocks[rem_sz]->unlock();
@ -1982,7 +1983,7 @@ void CompactibleFreeListSpace::save_marks() {

 bool CompactibleFreeListSpace::no_allocs_since_save_marks() {
  assert(_promoInfo.tracking(), "No preceding save_marks?");
-  assert(SharedHeap::heap()->n_par_threads() == 0,
+  assert(GenCollectedHeap::heap()->n_par_threads() == 0,
         "Shouldn't be called if using parallel gc.");
  return _promoInfo.noPromotions();
 }
@ -1991,7 +1992,7 @@ bool CompactibleFreeListSpace::no_allocs_since_save_marks() {
                                                                            \
 void CompactibleFreeListSpace::                                             \
 oop_since_save_marks_iterate##nv_suffix(OopClosureType* blk) {              \
-  assert(SharedHeap::heap()->n_par_threads() == 0,                          \
+  assert(GenCollectedHeap::heap()->n_par_threads() == 0,                    \
         "Shouldn't be called (yet) during parallel part of gc.");          \
  _promoInfo.promoted_oops_iterate##nv_suffix(blk);                         \
  /*                                                                        \
@ -2442,11 +2443,10 @@ void CompactibleFreeListSpace::verify() const {
  {
    VerifyAllOopsClosure cl(_collector, this, span, past_remark,
      _collector->markBitMap());
-    CollectedHeap* ch = Universe::heap();

    // Iterate over all oops in the heap. Uses the _no_header version
    // since we are not interested in following the klass pointers.
-    ch->oop_iterate_no_header(&cl);
+    GenCollectedHeap::heap()->oop_iterate_no_header(&cl);
  }

  if (VerifyObjectStartArray) {
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@ -28,7 +28,7 @@
 #include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
 #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
 #include "memory/binaryTreeDictionary.hpp"
-#include "memory/blockOffsetTable.inline.hpp"
+#include "memory/blockOffsetTable.hpp"
 #include "memory/freeList.hpp"
 #include "memory/space.hpp"

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -53,6 +53,7 @@
 #include "memory/padded.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/strongRootsScope.hpp"
 #include "memory/tenuredGeneration.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
@ -64,6 +65,7 @@
 #include "runtime/vmThread.hpp"
 #include "services/memoryService.hpp"
 #include "services/runtimeService.hpp"
+#include "utilities/stack.inline.hpp"

 // statics
 CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
@ -208,10 +210,6 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
                                           use_adaptive_freelists,
                                           dictionaryChoice);
  NOT_PRODUCT(debug_cms_space = _cmsSpace;)
-  if (_cmsSpace == NULL) {
-    vm_exit_during_initialization(
-      "CompactibleFreeListSpace allocation failure");
-  }
  _cmsSpace->_gen = this;

  _gc_stats = new CMSGCStats();
@ -230,14 +228,8 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
    typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
    _par_gc_thread_states =
      NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads, mtGC);
-    if (_par_gc_thread_states == NULL) {
-      vm_exit_during_initialization("Could not allocate par gc structs");
-    }
    for (uint i = 0; i < ParallelGCThreads; i++) {
      _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
-      if (_par_gc_thread_states[i] == NULL) {
-        vm_exit_during_initialization("Could not allocate par gc structs");
-      }
    }
  } else {
    _par_gc_thread_states = NULL;
@ -308,8 +300,6 @@ void CMSCollector::ref_processor_init() {

 AdaptiveSizePolicy* CMSCollector::size_policy() {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  assert(gch->kind() == CollectedHeap::GenCollectedHeap,
-    "Wrong type of heap");
  return gch->gen_policy()->size_policy();
 }

@ -586,11 +576,6 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
        return;
      }
      _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues, mtGC);
-      if (_hash_seed == NULL) {
-        warning("_hash_seed array allocation failure");
-        return;
-      }
-
      typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
      for (i = 0; i < num_queues; i++) {
        PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
@ -633,12 +618,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
    _eden_chunk_index = 0;
    _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
    _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity, mtGC);
-    if (_eden_chunk_array == NULL) {
-      _eden_chunk_capacity = 0;
-      warning("GC/CMS: _eden_chunk_array allocation failure");
-    }
  }
-  assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error");

  // Support for parallelizing survivor space rescan
  if ((CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) || CMSParallelInitialMarkEnabled) {
@ -648,52 +628,15 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
    _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads, mtGC);
    _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples, mtGC);
    _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads, mtGC);
-    if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL
-        || _cursor == NULL) {
-      warning("Failed to allocate survivor plab/chunk array");
-      if (_survivor_plab_array  != NULL) {
-        FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
-        _survivor_plab_array = NULL;
-      }
-      if (_survivor_chunk_array != NULL) {
-        FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
-        _survivor_chunk_array = NULL;
-      }
-      if (_cursor != NULL) {
-        FREE_C_HEAP_ARRAY(size_t, _cursor);
-        _cursor = NULL;
-      }
-    } else {
-      _survivor_chunk_capacity = 2*max_plab_samples;
-      for (uint i = 0; i < ParallelGCThreads; i++) {
-        HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples, mtGC);
-        if (vec == NULL) {
-          warning("Failed to allocate survivor plab array");
-          for (int j = i; j > 0; j--) {
-            FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array());
-          }
-          FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
-          FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
-          _survivor_plab_array = NULL;
-          _survivor_chunk_array = NULL;
-          _survivor_chunk_capacity = 0;
-          break;
-        } else {
-          ChunkArray* cur =
-            ::new (&_survivor_plab_array[i]) ChunkArray(vec,
-                                                        max_plab_samples);
-          assert(cur->end() == 0, "Should be 0");
-          assert(cur->array() == vec, "Should be vec");
-          assert(cur->capacity() == max_plab_samples, "Error");
-        }
-      }
+    _survivor_chunk_capacity = 2*max_plab_samples;
+    for (uint i = 0; i < ParallelGCThreads; i++) {
+      HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples, mtGC);
+      ChunkArray* cur = ::new (&_survivor_plab_array[i]) ChunkArray(vec, max_plab_samples);
+      assert(cur->end() == 0, "Should be 0");
+      assert(cur->array() == vec, "Should be vec");
+      assert(cur->capacity() == max_plab_samples, "Error");
    }
  }
-  assert(   (   _survivor_plab_array  != NULL
-             && _survivor_chunk_array != NULL)
-         || (   _survivor_chunk_capacity == 0
-             && _survivor_chunk_index == 0),
-         "Error");

  NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
  _gc_counters = new CollectorCounters("CMS", 1);
@ -1037,7 +980,7 @@ oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size) {
  assert_lock_strong(freelistLock());

 #ifndef PRODUCT
-  if (Universe::heap()->promotion_should_fail()) {
+  if (GenCollectedHeap::heap()->promotion_should_fail()) {
    return NULL;
  }
 #endif  // #ifndef PRODUCT
@ -1114,7 +1057,7 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
                                           oop old, markOop m,
                                           size_t word_sz) {
 #ifndef PRODUCT
-  if (Universe::heap()->promotion_should_fail()) {
+  if (GenCollectedHeap::heap()->promotion_should_fail()) {
    return NULL;
  }
 #endif  // #ifndef PRODUCT
@ -2524,7 +2467,7 @@ void CMSCollector::verify_after_remark_work_1() {
  verification_mark_bm()->iterate(&vcl);
  if (vcl.failed()) {
    gclog_or_tty->print("Verification failed");
-    Universe::heap()->print_on(gclog_or_tty);
+    gch->print_on(gclog_or_tty);
    fatal("CMS: failed marking verification after remark");
  }
 }
@ -3071,10 +3014,10 @@ void CMSCollector::checkpointRootsInitialWork() {
      gch->set_par_threads(n_workers);
      initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
      if (n_workers > 1) {
-        GenCollectedHeap::StrongRootsScope srs(gch);
+        StrongRootsScope srs;
        workers->run_task(&tsk);
      } else {
-        GenCollectedHeap::StrongRootsScope srs(gch);
+        StrongRootsScope srs;
        tsk.work(0);
      }
      gch->set_par_threads(0);
@ -5169,11 +5112,11 @@ void CMSCollector::do_remark_parallel() {
    // necessarily be so, since it's possible that we are doing
    // ST marking.
    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
-    GenCollectedHeap::StrongRootsScope srs(gch);
+    StrongRootsScope srs;
    workers->run_task(&tsk);
  } else {
    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
-    GenCollectedHeap::StrongRootsScope srs(gch);
+    StrongRootsScope srs;
    tsk.work(0);
  }

@ -5241,7 +5184,7 @@ void CMSCollector::do_remark_non_parallel() {
    verify_work_stacks_empty();

    gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-    GenCollectedHeap::StrongRootsScope srs(gch);
+    StrongRootsScope srs;

    gch->gen_process_roots(_cmsGen->level(),
                           true,  // younger gens as roots
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@ -38,8 +38,8 @@
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 #include "services/memoryService.hpp"
-#include "utilities/bitMap.inline.hpp"
-#include "utilities/stack.inline.hpp"
+#include "utilities/bitMap.hpp"
+#include "utilities/stack.hpp"
 #include "utilities/taskqueue.hpp"
 #include "utilities/yieldingWorkgroup.hpp"

--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp
@ -27,7 +27,7 @@

 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
 #include "gc_implementation/shared/concurrentGCThread.hpp"
-#include "runtime/thread.inline.hpp"
+#include "runtime/thread.hpp"

 class ConcurrentMarkSweepGeneration;
 class CMSCollector;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/promotionInfo.cpp
@ -23,6 +23,7 @@
 */

 #include "precompiled.hpp"
+#include "memory/genOopClosures.hpp"
 #include "gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp"
 #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
 #include "oops/markOop.inline.hpp"
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/vmCMSOperations.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -62,7 +62,7 @@ void VM_CMS_Operation::verify_before_gc() {
    HandleMark hm;
    FreelistLocker x(_collector);
    MutexLockerEx  y(_collector->bitMapLock(), Mutex::_no_safepoint_check_flag);
-    Universe::heap()->prepare_for_verify();
+    GenCollectedHeap::heap()->prepare_for_verify();
    Universe::verify();
  }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@ -34,6 +34,7 @@
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
@ -46,6 +47,7 @@
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/strongRootsScope.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/java.hpp"
@ -115,7 +117,7 @@ void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 }

 size_t CMBitMap::compute_size(size_t heap_size) {
-  return heap_size / mark_distance();
+  return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 }

 size_t CMBitMap::mark_distance() {
@ -1325,7 +1327,7 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {

  if (VerifyDuringGC) {
    HandleMark hm;  // handle scope
-    Universe::heap()->prepare_for_verify();
+    g1h->prepare_for_verify();
    Universe::verify(VerifyOption_G1UsePrevMarking,
                     " VerifyDuringGC:(before)");
  }
@ -1352,7 +1354,7 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
    // Verify the heap w.r.t. the previous marking bitmap.
    if (VerifyDuringGC) {
      HandleMark hm;  // handle scope
-      Universe::heap()->prepare_for_verify();
+      g1h->prepare_for_verify();
      Universe::verify(VerifyOption_G1UsePrevMarking,
                       " VerifyDuringGC:(overflow)");
    }
@ -1378,7 +1380,7 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {

    if (VerifyDuringGC) {
      HandleMark hm;  // handle scope
-      Universe::heap()->prepare_for_verify();
+      g1h->prepare_for_verify();
      Universe::verify(VerifyOption_G1UseNextMarking,
                       " VerifyDuringGC:(after)");
    }
@ -1986,13 +1988,13 @@ void ConcurrentMark::cleanup() {

  if (VerifyDuringGC) {
    HandleMark hm;  // handle scope
-    Universe::heap()->prepare_for_verify();
+    g1h->prepare_for_verify();
    Universe::verify(VerifyOption_G1UsePrevMarking,
                     " VerifyDuringGC:(before)");
  }
  g1h->check_bitmaps("Cleanup Start");

-  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
  g1p->record_concurrent_mark_cleanup_start();

  double start = os::elapsedTime();
@ -2097,7 +2099,7 @@ void ConcurrentMark::cleanup() {

  if (VerifyDuringGC) {
    HandleMark hm;  // handle scope
-    Universe::heap()->prepare_for_verify();
+    g1h->prepare_for_verify();
    Universe::verify(VerifyOption_G1UsePrevMarking,
                     " VerifyDuringGC:(after)");
  }
@ -2650,7 +2652,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {

  g1h->ensure_parsability(false);

-  G1CollectedHeap::StrongRootsScope srs(g1h);
+  StrongRootsScope srs;
  // this is remark, so we'll use up all active threads
  uint active_workers = g1h->workers()->active_workers();
  if (active_workers == 0) {
@ -3392,22 +3394,29 @@ void ConcurrentMark::print_finger() {
 }
 #endif

-void CMTask::scan_object(oop obj) {
+template<bool scan>
+inline void CMTask::process_grey_object(oop obj) {
+  assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
  assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");

  if (_cm->verbose_high()) {
-    gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
+    gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
                           _worker_id, p2i((void*) obj));
  }

  size_t obj_size = obj->size();
  _words_scanned += obj_size;

-  obj->oop_iterate(_cm_oop_closure);
+  if (scan) {
+    obj->oop_iterate(_cm_oop_closure);
+  }
  statsOnly( ++_objs_scanned );
  check_limits();
 }

+template void CMTask::process_grey_object<true>(oop);
+template void CMTask::process_grey_object<false>(oop);
+
 // Closure for iteration over bitmaps
 class CMBitMapClosure : public BitMapClosure {
 private:
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@ -1100,6 +1100,12 @@ private:
  void regular_clock_call();
  bool concurrent() { return _concurrent; }

+  // Test whether objAddr might have already been passed over by the
+  // mark bitmap scan, and so needs to be pushed onto the mark stack.
+  bool is_below_finger(HeapWord* objAddr, HeapWord* global_finger) const;
+
+  template<bool scan> void process_grey_object(oop obj);
+
 public:
  // It resets the task; it should be called right at the beginning of
  // a marking phase.
@ -1152,7 +1158,7 @@ public:
  inline void deal_with_reference(oop obj);

  // It scans an object and visits its children.
-  void scan_object(oop obj);
+  void scan_object(oop obj) { process_grey_object<true>(obj); }

  // It pushes an object on the local queue.
  inline void push(oop obj);
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
@ -259,14 +259,35 @@ inline void CMTask::push(oop obj) {
             ++_local_pushes );
 }

-// This determines whether the method below will check both the local
-// and global fingers when determining whether to push on the stack a
-// gray object (value 1) or whether it will only check the global one
-// (value 0). The tradeoffs are that the former will be a bit more
-// accurate and possibly push less on the stack, but it might also be
-// a little bit slower.
+inline bool CMTask::is_below_finger(HeapWord* objAddr,
+                                    HeapWord* global_finger) const {
+  // If objAddr is above the global finger, then the mark bitmap scan
+  // will find it later, and no push is needed.  Similarly, if we have
+  // a current region and objAddr is between the local finger and the
+  // end of the current region, then no push is needed.  The tradeoff
+  // of checking both vs only checking the global finger is that the
+  // local check will be more accurate and so result in fewer pushes,
+  // but may also be a little slower.
+  if (_finger != NULL) {
+    // We have a current region.

-#define _CHECK_BOTH_FINGERS_      1
+    // Finger and region values are all NULL or all non-NULL.  We
+    // use _finger to check since we immediately use its value.
+    assert(_curr_region != NULL, "invariant");
+    assert(_region_limit != NULL, "invariant");
+    assert(_region_limit <= global_finger, "invariant");
+
+    // True if objAddr is less than the local finger, or is between
+    // the region limit and the global finger.
+    if (objAddr < _finger) {
+      return true;
+    } else if (objAddr < _region_limit) {
+      return false;
+    } // Else check global finger.
+  }
+  // Check global finger.
+  return objAddr < global_finger;
+}

 inline void CMTask::deal_with_reference(oop obj) {
  if (_cm->verbose_high()) {
@ -297,50 +318,43 @@ inline void CMTask::deal_with_reference(oop obj) {
          // CAS done in CMBitMap::parMark() call in the routine above.
          HeapWord* global_finger = _cm->finger();

-#if _CHECK_BOTH_FINGERS_
-          // we will check both the local and global fingers
-
-          if (_finger != NULL && objAddr < _finger) {
-            if (_cm->verbose_high()) {
-              gclog_or_tty->print_cr("[%u] below the local finger ("PTR_FORMAT"), "
-                                     "pushing it", _worker_id, p2i(_finger));
+          // We only need to push a newly grey object on the mark
+          // stack if it is in a section of memory the mark bitmap
+          // scan has already examined.  Mark bitmap scanning
+          // maintains progress "fingers" for determining that.
+          //
+          // Notice that the global finger might be moving forward
+          // concurrently. This is not a problem. In the worst case, we
+          // mark the object while it is above the global finger and, by
+          // the time we read the global finger, it has moved forward
+          // past this object. In this case, the object will probably
+          // be visited when a task is scanning the region and will also
+          // be pushed on the stack. So, some duplicate work, but no
+          // correctness problems.
+          if (is_below_finger(objAddr, global_finger)) {
+            if (obj->is_typeArray()) {
+              // Immediately process arrays of primitive types, rather
+              // than pushing on the mark stack.  This keeps us from
+              // adding humongous objects to the mark stack that might
+              // be reclaimed before the entry is processed - see
+              // selection of candidates for eager reclaim of humongous
+              // objects.  The cost of the additional type test is
+              // mitigated by avoiding a trip through the mark stack,
+              // by only doing a bookkeeping update and avoiding the
+              // actual scan of the object - a typeArray contains no
+              // references, and the metadata is built-in.
+              process_grey_object<false>(obj);
+            } else {
+              if (_cm->verbose_high()) {
+                gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT
+                                       ", global: " PTR_FORMAT ") pushing "
+                                       PTR_FORMAT " on mark stack",
+                                       _worker_id, p2i(_finger),
+                                       p2i(global_finger), p2i(objAddr));
+              }
+              push(obj);
            }
-            push(obj);
-          } else if (_curr_region != NULL && objAddr < _region_limit) {
-            // do nothing
-          } else if (objAddr < global_finger) {
-            // Notice that the global finger might be moving forward
-            // concurrently. This is not a problem. In the worst case, we
-            // mark the object while it is above the global finger and, by
-            // the time we read the global finger, it has moved forward
-            // passed this object. In this case, the object will probably
-            // be visited when a task is scanning the region and will also
-            // be pushed on the stack. So, some duplicate work, but no
-            // correctness problems.
-
-            if (_cm->verbose_high()) {
-              gclog_or_tty->print_cr("[%u] below the global finger "
-                                     "("PTR_FORMAT"), pushing it",
-                                     _worker_id, p2i(global_finger));
-            }
-            push(obj);
-          } else {
-            // do nothing
          }
-#else // _CHECK_BOTH_FINGERS_
-          // we will only check the global finger
-
-          if (objAddr < global_finger) {
-            // see long comment above
-
-            if (_cm->verbose_high()) {
-              gclog_or_tty->print_cr("[%u] below the global finger "
-                                     "("PTR_FORMAT"), pushing it",
-                                     _worker_id, p2i(global_finger));
-            }
-            push(obj);
-          }
-#endif // _CHECK_BOTH_FINGERS_
        }
      }
    }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp
@ -29,6 +29,9 @@
 #include "gc_implementation/g1/g1AllocRegion.hpp"
 #include "gc_implementation/g1/g1InCSetState.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.hpp"
+
+class EvacuationInfo;

 // Base class for G1 allocators.
 class G1Allocator : public CHeapObj<mtGC> {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,6 +23,7 @@
 */

 #include "precompiled.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "memory/space.hpp"
@ -303,9 +304,9 @@ void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_
  assert(blk_start <= threshold, "blk_start should be at or before threshold");
  assert(pointer_delta(threshold, blk_start) <= N_words,
         "offset should be <= BlockOffsetSharedArray::N");
-  assert(Universe::heap()->is_in_reserved(blk_start),
+  assert(G1CollectedHeap::heap()->is_in_reserved(blk_start),
         "reference must be into the heap");
-  assert(Universe::heap()->is_in_reserved(blk_end-1),
+  assert(G1CollectedHeap::heap()->is_in_reserved(blk_end-1),
         "limit must be within the heap");
  assert(threshold == _array->_reserved.start() + index*N_words,
         "index must agree with threshold");
@ -458,7 +459,7 @@ G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
 }

 HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold_raw() {
-  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+  assert(!G1CollectedHeap::heap()->is_in_reserved(_array->_offset_array),
         "just checking");
  _next_offset_index = _array->index_for_raw(_bottom);
  _next_offset_index++;
@ -468,7 +469,7 @@ HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold_raw() {
 }

 void G1BlockOffsetArrayContigSpace::zero_bottom_entry_raw() {
-  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+  assert(!G1CollectedHeap::heap()->is_in_reserved(_array->_offset_array),
         "just checking");
  size_t bottom_index = _array->index_for_raw(_bottom);
  assert(_array->address_for_index_raw(bottom_index) == _bottom,
@ -477,7 +478,7 @@ void G1BlockOffsetArrayContigSpace::zero_bottom_entry_raw() {
 }

 HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
-  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+  assert(!G1CollectedHeap::heap()->is_in_reserved(_array->_offset_array),
         "just checking");
  _next_offset_index = _array->index_for(_bottom);
  _next_offset_index++;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@ -70,6 +70,7 @@
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/stack.inline.hpp"

 size_t G1CollectedHeap::_humongous_object_threshold_in_words = 0;

@ -1728,7 +1729,7 @@ void G1CollectedHeap::shrink(size_t shrink_bytes) {


 G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
-  SharedHeap(),
+  CollectedHeap(),
  _g1_policy(policy_),
  _dirty_card_queue_set(false),
  _into_cset_dirty_card_queue_set(false),
@ -1746,7 +1747,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
  _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
  _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
-  _humongous_is_live(),
+  _humongous_reclaim_candidates(),
  _has_humongous_reclaim_candidates(false),
  _free_regions_coming(false),
  _young_list(new YoungList(this)),
@ -1770,6 +1771,11 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :

  _g1h = this;

+  _workers = new FlexibleWorkGang("GC Thread", ParallelGCThreads,
+                          /* are_GC_task_threads */true,
+                          /* are_ConcurrentGC_threads */false);
+  _workers->initialize_workers();
+
  _allocator = G1Allocator::create_allocator(_g1h);
  _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2;

@ -1797,6 +1803,26 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
  guarantee(_task_queues != NULL, "task_queues allocation failure.");
 }

+G1RegionToSpaceMapper* G1CollectedHeap::create_aux_memory_mapper(const char* description,
+                                                                 size_t size,
+                                                                 size_t translation_factor) {
+  size_t preferred_page_size = os::page_size_for_region_unaligned(size, 1);
+  // Allocate a new reserved space, preferring to use large pages.
+  ReservedSpace rs(size, preferred_page_size);
+  G1RegionToSpaceMapper* result  =
+    G1RegionToSpaceMapper::create_mapper(rs,
+                                         size,
+                                         rs.alignment(),
+                                         HeapRegion::GrainBytes,
+                                         translation_factor,
+                                         mtGC);
+  if (TracePageSizes) {
+    gclog_or_tty->print_cr("G1 '%s': pg_sz=" SIZE_FORMAT " base=" PTR_FORMAT " size=" SIZE_FORMAT " alignment=" SIZE_FORMAT " reqsize=" SIZE_FORMAT,
+                           description, preferred_page_size, p2i(rs.base()), rs.size(), rs.alignment(), size);
+  }
+  return result;
+}
+
 jint G1CollectedHeap::initialize() {
  CollectedHeap::pre_initialize();
  os::enable_vtime();
@ -1864,57 +1890,35 @@ jint G1CollectedHeap::initialize() {
  ReservedSpace g1_rs = heap_rs.first_part(max_byte_size);
  G1RegionToSpaceMapper* heap_storage =
    G1RegionToSpaceMapper::create_mapper(g1_rs,
+                                         g1_rs.size(),
                                         UseLargePages ? os::large_page_size() : os::vm_page_size(),
                                         HeapRegion::GrainBytes,
                                         1,
                                         mtJavaHeap);
  heap_storage->set_mapping_changed_listener(&_listener);

-  // Reserve space for the block offset table. We do not support automatic uncommit
-  // for the card table at this time. BOT only.
-  ReservedSpace bot_rs(G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize));
+  // Create storage for the BOT, card table, card counts table (hot card cache) and the bitmaps.
  G1RegionToSpaceMapper* bot_storage =
-    G1RegionToSpaceMapper::create_mapper(bot_rs,
-                                         os::vm_page_size(),
-                                         HeapRegion::GrainBytes,
-                                         G1BlockOffsetSharedArray::N_bytes,
-                                         mtGC);
+    create_aux_memory_mapper("Block offset table",
+                             G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize),
+                             G1BlockOffsetSharedArray::N_bytes);

  ReservedSpace cardtable_rs(G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize));
  G1RegionToSpaceMapper* cardtable_storage =
-    G1RegionToSpaceMapper::create_mapper(cardtable_rs,
-                                         os::vm_page_size(),
-                                         HeapRegion::GrainBytes,
-                                         G1BlockOffsetSharedArray::N_bytes,
-                                         mtGC);
+    create_aux_memory_mapper("Card table",
+                             G1SATBCardTableLoggingModRefBS::compute_size(g1_rs.size() / HeapWordSize),
+                             G1BlockOffsetSharedArray::N_bytes);

-  // Reserve space for the card counts table.
-  ReservedSpace card_counts_rs(G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize));
  G1RegionToSpaceMapper* card_counts_storage =
-    G1RegionToSpaceMapper::create_mapper(card_counts_rs,
-                                         os::vm_page_size(),
-                                         HeapRegion::GrainBytes,
-                                         G1BlockOffsetSharedArray::N_bytes,
-                                         mtGC);
+    create_aux_memory_mapper("Card counts table",
+                             G1BlockOffsetSharedArray::compute_size(g1_rs.size() / HeapWordSize),
+                             G1BlockOffsetSharedArray::N_bytes);

-  // Reserve space for prev and next bitmap.
  size_t bitmap_size = CMBitMap::compute_size(g1_rs.size());
-
-  ReservedSpace prev_bitmap_rs(ReservedSpace::allocation_align_size_up(bitmap_size));
  G1RegionToSpaceMapper* prev_bitmap_storage =
-    G1RegionToSpaceMapper::create_mapper(prev_bitmap_rs,
-                                         os::vm_page_size(),
-                                         HeapRegion::GrainBytes,
-                                         CMBitMap::mark_distance(),
-                                         mtGC);
-
-  ReservedSpace next_bitmap_rs(ReservedSpace::allocation_align_size_up(bitmap_size));
+    create_aux_memory_mapper("Prev Bitmap", bitmap_size, CMBitMap::mark_distance());
  G1RegionToSpaceMapper* next_bitmap_storage =
-    G1RegionToSpaceMapper::create_mapper(next_bitmap_rs,
-                                         os::vm_page_size(),
-                                         HeapRegion::GrainBytes,
-                                         CMBitMap::mark_distance(),
-                                         mtGC);
+    create_aux_memory_mapper("Next Bitmap", bitmap_size, CMBitMap::mark_distance());

  _hrm.initialize(heap_storage, prev_bitmap_storage, next_bitmap_storage, bot_storage, cardtable_storage, card_counts_storage);
  g1_barrier_set()->initialize(cardtable_storage);
@ -1937,8 +1941,14 @@ jint G1CollectedHeap::initialize() {

  _g1h = this;

-  _in_cset_fast_test.initialize(_hrm.reserved().start(), _hrm.reserved().end(), HeapRegion::GrainBytes);
-  _humongous_is_live.initialize(_hrm.reserved().start(), _hrm.reserved().end(), HeapRegion::GrainBytes);
+  {
+    HeapWord* start = _hrm.reserved().start();
+    HeapWord* end = _hrm.reserved().end();
+    size_t granularity = HeapRegion::GrainBytes;
+
+    _in_cset_fast_test.initialize(start, end, granularity);
+    _humongous_reclaim_candidates.initialize(start, end, granularity);
+  }

  // Create the ConcurrentMark data structure and thread.
  // (Must do this late, so that "max_regions" is defined.)
@ -2026,15 +2036,15 @@ void G1CollectedHeap::stop() {
  }
 }

-void G1CollectedHeap::clear_humongous_is_live_table() {
-  guarantee(G1EagerReclaimHumongousObjects, "Should only be called if true");
-  _humongous_is_live.clear();
-}
-
 size_t G1CollectedHeap::conservative_max_heap_alignment() {
  return HeapRegion::max_region_size();
 }

+void G1CollectedHeap::post_initialize() {
+  CollectedHeap::post_initialize();
+  ref_processing_init();
+}
+
 void G1CollectedHeap::ref_processing_init() {
  // Reference processing in G1 currently works as follows:
  //
@ -2071,7 +2081,6 @@ void G1CollectedHeap::ref_processing_init() {
  //     * Discovery is atomic - i.e. not concurrent.
  //     * Reference discovery will not need a barrier.

-  SharedHeap::ref_processing_init();
  MemRegion mr = reserved_region();

  // Concurrent Mark ref processor
@ -2128,6 +2137,7 @@ void G1CollectedHeap::reset_gc_time_stamps(HeapRegion* hr) {
 }

 #ifndef PRODUCT
+
 class CheckGCTimeStampsHRClosure : public HeapRegionClosure {
 private:
  unsigned _gc_time_stamp;
@ -2462,11 +2472,6 @@ public:
  }
 };

-void G1CollectedHeap::oop_iterate(ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(cl);
-  heap_region_iterate(&blk);
-}
-
 // Iterates an ObjectClosure over all objects within a HeapRegion.

 class IterateObjectClosureRegionClosure: public HeapRegionClosure {
@ -2486,23 +2491,6 @@ void G1CollectedHeap::object_iterate(ObjectClosure* cl) {
  heap_region_iterate(&blk);
 }

-// Calls a SpaceClosure on a HeapRegion.
-
-class SpaceClosureRegionClosure: public HeapRegionClosure {
-  SpaceClosure* _cl;
-public:
-  SpaceClosureRegionClosure(SpaceClosure* cl) : _cl(cl) {}
-  bool doHeapRegion(HeapRegion* r) {
-    _cl->do_space(r);
-    return false;
-  }
-};
-
-void G1CollectedHeap::space_iterate(SpaceClosure* cl) {
-  SpaceClosureRegionClosure blk(cl);
-  heap_region_iterate(&blk);
-}
-
 void G1CollectedHeap::heap_region_iterate(HeapRegionClosure* cl) const {
  _hrm.iterate(cl);
 }
@ -2639,23 +2627,19 @@ HeapRegion* G1CollectedHeap::next_compaction_region(const HeapRegion* from) cons
  return result;
 }

-Space* G1CollectedHeap::space_containing(const void* addr) const {
-  return heap_region_containing(addr);
-}
-
 HeapWord* G1CollectedHeap::block_start(const void* addr) const {
-  Space* sp = space_containing(addr);
-  return sp->block_start(addr);
+  HeapRegion* hr = heap_region_containing(addr);
+  return hr->block_start(addr);
 }

 size_t G1CollectedHeap::block_size(const HeapWord* addr) const {
-  Space* sp = space_containing(addr);
-  return sp->block_size(addr);
+  HeapRegion* hr = heap_region_containing(addr);
+  return hr->block_size(addr);
 }

 bool G1CollectedHeap::block_is_obj(const HeapWord* addr) const {
-  Space* sp = space_containing(addr);
-  return sp->block_is_obj(addr);
+  HeapRegion* hr = heap_region_containing(addr);
+  return hr->block_is_obj(addr);
 }

 bool G1CollectedHeap::supports_tlab_allocation() const {
@ -3336,8 +3320,8 @@ void G1CollectedHeap::print_all_rsets() {
 #endif // PRODUCT

 G1CollectedHeap* G1CollectedHeap::heap() {
-  assert(_sh->kind() == CollectedHeap::G1CollectedHeap,
-         "not a garbage-first heap");
+  assert(_g1h != NULL, "Uninitialized access to G1CollectedHeap::heap()");
+  assert(_g1h->kind() == CollectedHeap::G1CollectedHeap, "Not a G1 heap");
  return _g1h;
 }

@ -3434,12 +3418,6 @@ size_t G1CollectedHeap::cards_scanned() {
  return g1_rem_set()->cardsScanned();
 }

-bool G1CollectedHeap::humongous_region_is_always_live(uint index) {
-  HeapRegion* region = region_at(index);
-  assert(region->is_starts_humongous(), "Must start a humongous object");
-  return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty();
-}
-
 class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
 private:
  size_t _total_humongous;
@ -3447,14 +3425,59 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {

  DirtyCardQueue _dcq;

-  bool humongous_region_is_candidate(uint index) {
-    HeapRegion* region = G1CollectedHeap::heap()->region_at(index);
-    assert(region->is_starts_humongous(), "Must start a humongous object");
+  // We don't nominate objects with many remembered set entries, on
+  // the assumption that such objects are likely still live.
+  bool is_remset_small(HeapRegion* region) const {
    HeapRegionRemSet* const rset = region->rem_set();
-    bool const allow_stale_refs = G1EagerReclaimHumongousObjectsWithStaleRefs;
-    return !oop(region->bottom())->is_objArray() &&
-           ((allow_stale_refs && rset->occupancy_less_or_equal_than(G1RSetSparseRegionEntries)) ||
-            (!allow_stale_refs && rset->is_empty()));
+    return G1EagerReclaimHumongousObjectsWithStaleRefs
+      ? rset->occupancy_less_or_equal_than(G1RSetSparseRegionEntries)
+      : rset->is_empty();
+  }
+
+  bool is_typeArray_region(HeapRegion* region) const {
+    return oop(region->bottom())->is_typeArray();
+  }
+
+  bool humongous_region_is_candidate(G1CollectedHeap* heap, HeapRegion* region) const {
+    assert(region->is_starts_humongous(), "Must start a humongous object");
+
+    // Candidate selection must satisfy the following constraints
+    // while concurrent marking is in progress:
+    //
+    // * In order to maintain SATB invariants, an object must not be
+    // reclaimed if it was allocated before the start of marking and
+    // has not had its references scanned.  Such an object must have
+    // its references (including type metadata) scanned to ensure no
+    // live objects are missed by the marking process.  Objects
+    // allocated after the start of concurrent marking don't need to
+    // be scanned.
+    //
+    // * An object must not be reclaimed if it is on the concurrent
+    // mark stack.  Objects allocated after the start of concurrent
+    // marking are never pushed on the mark stack.
+    //
+    // Nominating only objects allocated after the start of concurrent
+    // marking is sufficient to meet both constraints.  This may miss
+    // some objects that satisfy the constraints, but the marking data
+    // structures don't support efficiently performing the needed
+    // additional tests or scrubbing of the mark stack.
+    //
+    // However, we presently only nominate is_typeArray() objects.
+    // A humongous object containing references induces remembered
+    // set entries on other regions.  In order to reclaim such an
+    // object, those remembered sets would need to be cleaned up.
+    //
+    // We also treat is_typeArray() objects specially, allowing them
+    // to be reclaimed even if allocated before the start of
+    // concurrent mark.  For this we rely on mark stack insertion to
+    // exclude is_typeArray() objects, preventing reclaiming an object
+    // that is in the mark stack.  We also rely on the metadata for
+    // such objects to be built-in and so ensured to be kept live.
+    // Frequent allocation and drop of large binary blobs is an
+    // important use case for eager reclaim, and this special handling
+    // may reduce needed headroom.
+
+    return is_typeArray_region(region) && is_remset_small(region);
  }

 public:
@ -3470,14 +3493,17 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
    }
    G1CollectedHeap* g1h = G1CollectedHeap::heap();

-    uint region_idx = r->hrm_index();
-    bool is_candidate = humongous_region_is_candidate(region_idx);
-    // Is_candidate already filters out humongous object with large remembered sets.
-    // If we have a humongous object with a few remembered sets, we simply flush these
-    // remembered set entries into the DCQS. That will result in automatic
-    // re-evaluation of their remembered set entries during the following evacuation
-    // phase.
+    bool is_candidate = humongous_region_is_candidate(g1h, r);
+    uint rindex = r->hrm_index();
+    g1h->set_humongous_reclaim_candidate(rindex, is_candidate);
    if (is_candidate) {
+      _candidate_humongous++;
+      g1h->register_humongous_region_with_cset(rindex);
+      // Is_candidate already filters out humongous object with large remembered sets.
+      // If we have a humongous object with a few remembered sets, we simply flush these
+      // remembered set entries into the DCQS. That will result in automatic
+      // re-evaluation of their remembered set entries during the following evacuation
+      // phase.
      if (!r->rem_set()->is_empty()) {
        guarantee(r->rem_set()->occupancy_less_or_equal_than(G1RSetSparseRegionEntries),
                  "Found a not-small remembered set here. This is inconsistent with previous assumptions.");
@ -3499,8 +3525,6 @@ class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
        r->rem_set()->clear_locked();
      }
      assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty.");
-      g1h->register_humongous_region_with_cset(region_idx);
-      _candidate_humongous++;
    }
    _total_humongous++;

@ -3520,6 +3544,7 @@ void G1CollectedHeap::register_humongous_regions_with_cset() {
  }
  double time = os::elapsed_counter();

+  // Collect reclaim candidate information and register candidates with cset.
  RegisterHumongousWithInCSetFastTestClosure cl;
  heap_region_iterate(&cl);

@ -3529,10 +3554,6 @@ void G1CollectedHeap::register_humongous_regions_with_cset() {
                                                                  cl.candidate_humongous());
  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;

-  if (_has_humongous_reclaim_candidates || G1TraceEagerReclaimHumongousObjects) {
-    clear_humongous_is_live_table();
-  }
-
  // Finally flush all remembered set entries to re-check into the global DCQS.
  cl.flush_rem_set_entries();
 }
@ -5994,11 +6015,11 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure {
    // required because stale remembered sets might reference locations that
    // are currently allocated into.
    uint region_idx = r->hrm_index();
-    if (g1h->humongous_is_live(region_idx) ||
-        g1h->humongous_region_is_always_live(region_idx)) {
+    if (!g1h->is_humongous_reclaim_candidate(region_idx) ||
+        !r->rem_set()->is_empty()) {

      if (G1TraceEagerReclaimHumongousObjects) {
-        gclog_or_tty->print_cr("Live humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+        gclog_or_tty->print_cr("Live humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d reclaim candidate %d type array %d",
                               region_idx,
                               obj->size()*HeapWordSize,
                               r->bottom(),
@ -6006,20 +6027,21 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure {
                               r->rem_set()->occupied(),
                               r->rem_set()->strong_code_roots_list_length(),
                               next_bitmap->isMarked(r->bottom()),
-                               g1h->humongous_is_live(region_idx),
-                               obj->is_objArray()
+                               g1h->is_humongous_reclaim_candidate(region_idx),
+                               obj->is_typeArray()
                              );
      }

      return false;
    }

-    guarantee(!obj->is_objArray(),
-              err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.",
+    guarantee(obj->is_typeArray(),
+              err_msg("Only eagerly reclaiming type arrays is supported, but the object "
+                      PTR_FORMAT " is not.",
                      r->bottom()));

    if (G1TraceEagerReclaimHumongousObjects) {
-      gclog_or_tty->print_cr("Dead humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+      gclog_or_tty->print_cr("Dead humongous region %u size "SIZE_FORMAT" start "PTR_FORMAT" length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d reclaim candidate %d type array %d",
                             region_idx,
                             obj->size()*HeapWordSize,
                             r->bottom(),
@ -6027,8 +6049,8 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure {
                             r->rem_set()->occupied(),
                             r->rem_set()->strong_code_roots_list_length(),
                             next_bitmap->isMarked(r->bottom()),
-                             g1h->humongous_is_live(region_idx),
-                             obj->is_objArray()
+                             g1h->is_humongous_reclaim_candidate(region_idx),
+                             obj->is_typeArray()
                            );
    }
    // Need to clear mark bit of the humongous object if already set.
@ -6163,8 +6185,6 @@ void G1CollectedHeap::wait_while_free_regions_coming() {
 }

 void G1CollectedHeap::set_region_short_lived_locked(HeapRegion* hr) {
-  assert(heap_lock_held_for_gc(),
-              "the heap lock should already be held by or for this thread");
  _young_list->push_region(hr);
 }

--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@ -40,9 +40,9 @@
 #include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
 #include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
-#include "memory/sharedHeap.hpp"
 #include "utilities/stack.hpp"

 // A "G1CollectedHeap" is an implementation of a java heap for HotSpot.
@ -76,6 +76,7 @@ class G1OldTracer;
 class EvacuationFailedInfo;
 class nmethod;
 class Ticks;
+class FlexibleWorkGang;

 typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
@ -177,7 +178,7 @@ class G1RegionMappingChangedListener : public G1MappingChangedListener {
  virtual void on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 };

-class G1CollectedHeap : public SharedHeap {
+class G1CollectedHeap : public CollectedHeap {
  friend class VM_CollectForMetadataAllocation;
  friend class VM_G1CollectForAllocation;
  friend class VM_G1CollectFull;
@ -204,6 +205,8 @@ private:
  // The one and only G1CollectedHeap, so static functions can find it.
  static G1CollectedHeap* _g1h;

+  FlexibleWorkGang* _workers;
+
  static size_t _humongous_object_threshold_in_words;

  // The secondary free list which contains regions that have been
@ -217,7 +220,6 @@ private:
  // It keeps track of the humongous regions.
  HeapRegionSet _humongous_set;

-  void clear_humongous_is_live_table();
  void eagerly_reclaim_humongous_regions();

  // The number of regions we could create by expansion.
@ -287,22 +289,26 @@ private:
  // Helper for monitoring and management support.
  G1MonitoringSupport* _g1mm;

-  // Records whether the region at the given index is kept live by roots or
-  // references from the young generation.
-  class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> {
+  // Records whether the region at the given index is (still) a
+  // candidate for eager reclaim.  Only valid for humongous start
+  // regions; other regions have unspecified values.  Humongous start
+  // regions are initialized at start of collection pause, with
+  // candidates removed from the set as they are found reachable from
+  // roots or the young generation.
+  class HumongousReclaimCandidates : public G1BiasedMappedArray<bool> {
   protected:
    bool default_value() const { return false; }
   public:
    void clear() { G1BiasedMappedArray<bool>::clear(); }
-    void set_live(uint region) {
-      set_by_index(region, true);
+    void set_candidate(uint region, bool value) {
+      set_by_index(region, value);
    }
-    bool is_live(uint region) {
+    bool is_candidate(uint region) {
      return get_by_index(region);
    }
  };

-  HumongousIsLiveBiasedMappedArray _humongous_is_live;
+  HumongousReclaimCandidates _humongous_reclaim_candidates;
  // Stores whether during humongous object registration we found candidate regions.
  // If not, we can skip a few steps.
  bool _has_humongous_reclaim_candidates;
@ -351,6 +357,12 @@ private:
  // heap after a compaction.
  void print_hrm_post_compaction();

+  // Create a memory mapper for auxiliary data structures of the given size and
+  // translation factor.
+  static G1RegionToSpaceMapper* create_aux_memory_mapper(const char* description,
+                                                         size_t size,
+                                                         size_t translation_factor);
+
  double verify(bool guard, const char* msg);
  void verify_before_gc();
  void verify_after_gc();
@ -605,6 +617,7 @@ protected:
  void enqueue_discovered_references(uint no_of_gc_workers);

 public:
+  FlexibleWorkGang* workers() const { return _workers; }

  G1Allocator* allocator() {
    return _allocator;
@ -630,21 +643,18 @@ public:
  inline AllocationContextStats& allocation_context_stats();

  // Do anything common to GC's.
-  virtual void gc_prologue(bool full);
-  virtual void gc_epilogue(bool full);
+  void gc_prologue(bool full);
+  void gc_epilogue(bool full);

+  // Modify the reclaim candidate set and test for presence.
+  // These are only valid for starts_humongous regions.
+  inline void set_humongous_reclaim_candidate(uint region, bool value);
+  inline bool is_humongous_reclaim_candidate(uint region);
+
+  // Remove from the reclaim candidate set.  Also remove from the
+  // collection set so that later encounters avoid the slow path.
  inline void set_humongous_is_live(oop obj);

-  bool humongous_is_live(uint region) {
-    return _humongous_is_live.is_live(region);
-  }
-
-  // Returns whether the given region (which must be a humongous (start) region)
-  // is to be considered conservatively live regardless of any other conditions.
-  bool humongous_region_is_always_live(uint index);
-  // Returns whether the given region (which must be a humongous (start) region)
-  // is considered a candidate for eager reclamation.
-  bool humongous_region_is_candidate(uint index);
  // Register the given region to be part of the collection set.
  inline void register_humongous_region_with_cset(uint index);
  // Register regions with humongous objects (actually on the start region) in
@ -1000,11 +1010,14 @@ public:
  // Return the (conservative) maximum heap alignment for any G1 heap
  static size_t conservative_max_heap_alignment();

+  // Does operations required after initialization has been done.
+  void post_initialize();
+
  // Initialize weak reference processing.
-  virtual void ref_processing_init();
+  void ref_processing_init();

  // Explicitly import set_par_threads into this scope
-  using SharedHeap::set_par_threads;
+  using CollectedHeap::set_par_threads;
  // Set _n_par_threads according to a policy TBD.
  void set_par_threads();

@ -1251,10 +1264,6 @@ public:

  // Iteration functions.

-  // Iterate over all the ref-containing fields of all objects, calling
-  // "cl.do_oop" on each.
-  virtual void oop_iterate(ExtendedOopClosure* cl);
-
  // Iterate over all objects, calling "cl.do_object" on each.
  virtual void object_iterate(ObjectClosure* cl);

@ -1262,9 +1271,6 @@ public:
    object_iterate(cl);
  }

-  // Iterate over all spaces in use in the heap, in ascending address order.
-  virtual void space_iterate(SpaceClosure* cl);
-
  // Iterate over heap regions, in address order, terminating the
  // iteration early if the "doHeapRegion" method returns "true".
  void heap_region_iterate(HeapRegionClosure* blk) const;
@ -1307,10 +1313,6 @@ public:

  HeapRegion* next_compaction_region(const HeapRegion* from) const;

-  // A CollectedHeap will contain some number of spaces.  This finds the
-  // space containing a given address, or else returns NULL.
-  virtual Space* space_containing(const void* addr) const;
-
  // Returns the HeapRegion that contains addr. addr must not be NULL.
  template <class T>
  inline HeapRegion* heap_region_containing_raw(const T addr) const;
@ -1344,9 +1346,6 @@ public:
  // the block is an object.
  virtual bool block_is_obj(const HeapWord* addr) const;

-  // Does this heap support heap inspection? (+PrintClassHistogram)
-  virtual bool supports_heap_inspection() const { return true; }
-
  // Section on thread-local allocation buffers (TLABs)
  // See CollectedHeap for semantics.

--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@ -352,20 +352,30 @@ inline bool G1CollectedHeap::is_obj_ill(const oop obj) const {
  return is_obj_ill(obj, heap_region_containing(obj));
 }

+inline void G1CollectedHeap::set_humongous_reclaim_candidate(uint region, bool value) {
+  assert(_hrm.at(region)->is_starts_humongous(), "Must start a humongous object");
+  _humongous_reclaim_candidates.set_candidate(region, value);
+}
+
+inline bool G1CollectedHeap::is_humongous_reclaim_candidate(uint region) {
+  assert(_hrm.at(region)->is_starts_humongous(), "Must start a humongous object");
+  return _humongous_reclaim_candidates.is_candidate(region);
+}
+
 inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
  uint region = addr_to_region((HeapWord*)obj);
-  // We not only set the "live" flag in the humongous_is_live table, but also
+  // Clear the flag in the humongous_reclaim_candidates table.  Also
  // reset the entry in the _in_cset_fast_test table so that subsequent references
  // to the same humongous object do not go into the slow path again.
  // This is racy, as multiple threads may at the same time enter here, but this
  // is benign.
-  // During collection we only ever set the "live" flag, and only ever clear the
+  // During collection we only ever clear the "candidate" flag, and only ever clear the
  // entry in the in_cset_fast_table.
  // We only ever evaluate the contents of these tables (in the VM thread) after
  // having synchronized the worker threads with the VM thread, or in the same
  // thread (i.e. within the VM thread).
-  if (!_humongous_is_live.is_live(region)) {
-    _humongous_is_live.set_live(region);
+  if (is_humongous_reclaim_candidate(region)) {
+    set_humongous_reclaim_candidate(region, false);
    _in_cset_fast_test.clear_humongous(region);
  }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@ -1460,7 +1460,7 @@ void G1CollectorPolicy::update_survivors_policy() {
  _max_survivor_regions = (uint) ceil(max_survivor_regions_d);

  _tenuring_threshold = _survivors_age_table.compute_tenuring_threshold(
-        HeapRegion::GrainWords * _max_survivor_regions);
+        HeapRegion::GrainWords * _max_survivor_regions, counters());
 }

 bool G1CollectorPolicy::force_initial_mark_if_outside_cycle(
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -263,7 +263,6 @@ G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
  _gc_par_phases[SystemDictionaryRoots] = new WorkerDataArray<double>(max_gc_threads, "SystemDictionary Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[CLDGRoots] = new WorkerDataArray<double>(max_gc_threads, "CLDG Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[JVMTIRoots] = new WorkerDataArray<double>(max_gc_threads, "JVMTI Roots (ms)", true, G1Log::LevelFinest, 3);
-  _gc_par_phases[CodeCacheRoots] = new WorkerDataArray<double>(max_gc_threads, "CodeCache Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[CMRefRoots] = new WorkerDataArray<double>(max_gc_threads, "CM RefProcessor Roots (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[WaitForStrongCLD] = new WorkerDataArray<double>(max_gc_threads, "Wait For Strong CLD (ms)", true, G1Log::LevelFinest, 3);
  _gc_par_phases[WeakCLDRoots] = new WorkerDataArray<double>(max_gc_threads, "Weak CLD Roots (ms)", true, G1Log::LevelFinest, 3);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2014 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015 Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -51,7 +51,6 @@ class G1GCPhaseTimes : public CHeapObj<mtGC> {
    SystemDictionaryRoots,
    CLDGRoots,
    JVMTIRoots,
-    CodeCacheRoots,
    CMRefRoots,
    WaitForStrongCLD,
    WeakCLDRoots,
--- a/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1HotCardCache.hpp
@ -29,7 +29,7 @@
 #include "gc_implementation/g1/g1CardCounts.hpp"
 #include "memory/allocation.hpp"
 #include "runtime/safepoint.hpp"
-#include "runtime/thread.inline.hpp"
+#include "runtime/thread.hpp"
 #include "utilities/globalDefinitions.hpp"

 class DirtyCardQueue;
@ -123,7 +123,7 @@ class G1HotCardCache: public CHeapObj<mtGC> {
  // Resets the hot card cache and discards the entries.
  void reset_hot_cache() {
    assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-    assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread");
+    assert(Thread::current_noinline()->is_VM_thread(), "Current thread should be the VMthread");
    if (default_use_cache()) {
        reset_hot_cache_internal();
    }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
@ -61,9 +61,8 @@ void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
                                      bool clear_all_softrefs) {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");

-  SharedHeap* sh = SharedHeap::heap();
 #ifdef ASSERT
-  if (sh->collector_policy()->should_clear_all_soft_refs()) {
+  if (G1CollectedHeap::heap()->collector_policy()->should_clear_all_soft_refs()) {
    assert(clear_all_softrefs, "Policy should have been checked earler");
  }
 #endif
@ -102,11 +101,6 @@ void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
  BiasedLocking::restore_marks();
  GenMarkSweep::deallocate_stacks();

-  // "free at last gc" is calculated from these.
-  // CHF: cheating for now!!!
-  //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
-  //  Universe::set_heap_used_at_last_gc(Universe::heap()->used());
-
  CodeCache::gc_epilogue();
  JvmtiExport::gc_epilogue();

@ -168,12 +162,12 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
  Klass::clean_weak_klass_links(&GenMarkSweep::is_alive);

  // Delete entries for dead interned string and clean up unreferenced symbols in symbol table.
-  G1CollectedHeap::heap()->unlink_string_and_symbol_table(&GenMarkSweep::is_alive);
+  g1h->unlink_string_and_symbol_table(&GenMarkSweep::is_alive);

  if (VerifyDuringGC) {
    HandleMark hm;  // handle scope
    COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact);
-    Universe::heap()->prepare_for_verify();
+    g1h->prepare_for_verify();
    // Note: we can verify only the heap here. When an object is
    // marked, the previous value of the mark word (including
    // identity hash values, ages, etc) is preserved, and the mark
@ -187,7 +181,7 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
    if (!VerifySilently) {
      gclog_or_tty->print(" VerifyDuringGC:(full)[Verifying ");
    }
-    Universe::heap()->verify(VerifySilently, VerifyOption_G1UseMarkWord);
+    g1h->verify(VerifySilently, VerifyOption_G1UseMarkWord);
    if (!VerifySilently) {
      gclog_or_tty->print_cr("]");
    }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp
@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1MARKSWEEP_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1MARKSWEEP_HPP

-#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
 #include "gc_implementation/g1/heapRegion.hpp"
 #include "memory/genMarkSweep.hpp"
 #include "memory/generation.hpp"
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.cpp
@ -23,9 +23,12 @@
 */

 #include "precompiled.hpp"
+#include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1ParScanThreadState.hpp"
+#include "memory/iterator.inline.hpp"
+#include "utilities/stack.inline.hpp"

 G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1,  G1ParScanThreadState* par_scan_state) :
  G1ParClosureSuper(g1, par_scan_state), _scanned_klass(NULL),
@ -50,3 +53,6 @@ void G1ParClosureSuper::set_par_scan_thread_state(G1ParScanThreadState* par_scan
  assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u),
         err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u)));
 }
+
+// Generate G1 specialized oop_oop_iterate functions.
+SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_G1(ALL_KLASS_OOP_OOP_ITERATE_DEFN)
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -172,7 +172,7 @@ inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
  oopDesc* o = obj;
 #endif // CHECK_UNHANDLED_OOPS
  assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
-  assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  assert(_g1->is_in_reserved(obj), "must be in heap");
 #endif // ASSERT

  assert(_from != NULL, "from region must be non-NULL");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -44,37 +44,45 @@
 #endif
 #include "utilities/bitMap.inline.hpp"

-G1PageBasedVirtualSpace::G1PageBasedVirtualSpace() : _low_boundary(NULL),
-  _high_boundary(NULL), _committed(), _page_size(0), _special(false),
+G1PageBasedVirtualSpace::G1PageBasedVirtualSpace(ReservedSpace rs, size_t used_size, size_t page_size) :
+  _low_boundary(NULL), _high_boundary(NULL), _committed(), _page_size(0), _special(false),
  _dirty(), _executable(false) {
+  initialize_with_page_size(rs, used_size, page_size);
 }

-bool G1PageBasedVirtualSpace::initialize_with_granularity(ReservedSpace rs, size_t page_size) {
-  if (!rs.is_reserved()) {
-    return false;  // Allocation failed.
-  }
-  assert(_low_boundary == NULL, "VirtualSpace already initialized");
-  assert(page_size > 0, "Granularity must be non-zero.");
+void G1PageBasedVirtualSpace::initialize_with_page_size(ReservedSpace rs, size_t used_size, size_t page_size) {
+  guarantee(rs.is_reserved(), "Given reserved space must have been reserved already.");
+
+  vmassert(_low_boundary == NULL, "VirtualSpace already initialized");
+  vmassert(page_size > 0, "Page size must be non-zero.");
+
+  guarantee(is_ptr_aligned(rs.base(), page_size),
+            err_msg("Reserved space base " PTR_FORMAT " is not aligned to requested page size " SIZE_FORMAT, p2i(rs.base()), page_size));
+  guarantee(is_size_aligned(used_size, os::vm_page_size()),
+            err_msg("Given used reserved space size needs to be OS page size aligned (%d bytes) but is " SIZE_FORMAT, os::vm_page_size(), used_size));
+  guarantee(used_size <= rs.size(),
+            err_msg("Used size of reserved space " SIZE_FORMAT " bytes is smaller than reservation at " SIZE_FORMAT " bytes", used_size, rs.size()));
+  guarantee(is_size_aligned(rs.size(), page_size),
+            err_msg("Expected that the virtual space is size aligned, but " SIZE_FORMAT " is not aligned to page size " SIZE_FORMAT, rs.size(), page_size));

  _low_boundary  = rs.base();
-  _high_boundary = _low_boundary + rs.size();
+  _high_boundary = _low_boundary + used_size;

  _special = rs.special();
  _executable = rs.executable();

  _page_size = page_size;

-  assert(_committed.size() == 0, "virtual space initialized more than once");
-  uintx size_in_bits = rs.size() / page_size;
-  _committed.resize(size_in_bits, /* in_resource_area */ false);
+  vmassert(_committed.size() == 0, "virtual space initialized more than once");
+  BitMap::idx_t size_in_pages = rs.size() / page_size;
+  _committed.resize(size_in_pages, /* in_resource_area */ false);
  if (_special) {
-    _dirty.resize(size_in_bits, /* in_resource_area */ false);
+    _dirty.resize(size_in_pages, /* in_resource_area */ false);
  }

-  return true;
+  _tail_size = used_size % _page_size;
 }

-
 G1PageBasedVirtualSpace::~G1PageBasedVirtualSpace() {
  release();
 }
@ -87,12 +95,18 @@ void G1PageBasedVirtualSpace::release() {
  _special                = false;
  _executable             = false;
  _page_size              = 0;
+  _tail_size              = 0;
  _committed.resize(0, false);
  _dirty.resize(0, false);
 }

 size_t G1PageBasedVirtualSpace::committed_size() const {
-  return _committed.count_one_bits() * _page_size;
+  size_t result = _committed.count_one_bits() * _page_size;
+  // The last page might not be in full.
+  if (is_last_page_partial() && _committed.at(_committed.size() - 1)) {
+    result -= _page_size - _tail_size;
+  }
+  return result;
 }

 size_t G1PageBasedVirtualSpace::reserved_size() const {
@ -103,65 +117,134 @@ size_t G1PageBasedVirtualSpace::uncommitted_size()  const {
  return reserved_size() - committed_size();
 }

-uintptr_t G1PageBasedVirtualSpace::addr_to_page_index(char* addr) const {
+size_t G1PageBasedVirtualSpace::addr_to_page_index(char* addr) const {
  return (addr - _low_boundary) / _page_size;
 }

-bool G1PageBasedVirtualSpace::is_area_committed(uintptr_t start, size_t size_in_pages) const {
-  uintptr_t end = start + size_in_pages;
-  return _committed.get_next_zero_offset(start, end) >= end;
+bool G1PageBasedVirtualSpace::is_area_committed(size_t start_page, size_t size_in_pages) const {
+  size_t end_page = start_page + size_in_pages;
+  return _committed.get_next_zero_offset(start_page, end_page) >= end_page;
 }

-bool G1PageBasedVirtualSpace::is_area_uncommitted(uintptr_t start, size_t size_in_pages) const {
-  uintptr_t end = start + size_in_pages;
-  return _committed.get_next_one_offset(start, end) >= end;
+bool G1PageBasedVirtualSpace::is_area_uncommitted(size_t start_page, size_t size_in_pages) const {
+  size_t end_page = start_page + size_in_pages;
+  return _committed.get_next_one_offset(start_page, end_page) >= end_page;
 }

-char* G1PageBasedVirtualSpace::page_start(uintptr_t index) {
+char* G1PageBasedVirtualSpace::page_start(size_t index) const {
  return _low_boundary + index * _page_size;
 }

-size_t G1PageBasedVirtualSpace::byte_size_for_pages(size_t num) {
-  return num * _page_size;
+bool G1PageBasedVirtualSpace::is_after_last_page(size_t index) const {
+  guarantee(index <= _committed.size(),
+            err_msg("Given boundary page " SIZE_FORMAT " is beyond managed page count " SIZE_FORMAT, index, _committed.size()));
+  return index == _committed.size();
 }

-bool G1PageBasedVirtualSpace::commit(uintptr_t start, size_t size_in_pages) {
+void G1PageBasedVirtualSpace::commit_preferred_pages(size_t start, size_t num_pages) {
+  vmassert(num_pages > 0, "No full pages to commit");
+  vmassert(start + num_pages <= _committed.size(),
+           err_msg("Tried to commit area from page " SIZE_FORMAT " to page " SIZE_FORMAT " "
+                   "that is outside of managed space of " SIZE_FORMAT " pages",
+                   start, start + num_pages, _committed.size()));
+
+  char* start_addr = page_start(start);
+  size_t size = num_pages * _page_size;
+
+  os::commit_memory_or_exit(start_addr, size, _page_size, _executable,
+                            err_msg("Failed to commit area from " PTR_FORMAT " to " PTR_FORMAT " of length " SIZE_FORMAT ".",
+                                    p2i(start_addr), p2i(start_addr + size), size));
+}
+
+void G1PageBasedVirtualSpace::commit_tail() {
+  vmassert(_tail_size > 0, "The size of the tail area must be > 0 when reaching here");
+
+  char* const aligned_end_address = (char*)align_ptr_down(_high_boundary, _page_size);
+  os::commit_memory_or_exit(aligned_end_address, _tail_size, os::vm_page_size(), _executable,
+                            err_msg("Failed to commit tail area from " PTR_FORMAT " to " PTR_FORMAT " of length " SIZE_FORMAT ".",
+                                    p2i(aligned_end_address), p2i(_high_boundary), _tail_size));
+}
+
+void G1PageBasedVirtualSpace::commit_internal(size_t start_page, size_t end_page) {
+  guarantee(start_page < end_page,
+            err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page));
+  guarantee(end_page <= _committed.size(),
+            err_msg("Given end page " SIZE_FORMAT " is beyond end of managed page amount of " SIZE_FORMAT, end_page, _committed.size()));
+
+  size_t pages = end_page - start_page;
+  bool need_to_commit_tail = is_after_last_page(end_page) && is_last_page_partial();
+
+  // If we have to commit some (partial) tail area, decrease the amount of pages to avoid
+  // committing that in the full-page commit code.
+  if (need_to_commit_tail) {
+    pages--;
+  }
+
+  if (pages > 0) {
+    commit_preferred_pages(start_page, pages);
+  }
+
+  if (need_to_commit_tail) {
+    commit_tail();
+  }
+}
+
+char* G1PageBasedVirtualSpace::bounded_end_addr(size_t end_page) const {
+  return MIN2(_high_boundary, page_start(end_page));
+}
+
+void G1PageBasedVirtualSpace::pretouch_internal(size_t start_page, size_t end_page) {
+  guarantee(start_page < end_page,
+            err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page));
+
+  os::pretouch_memory(page_start(start_page), bounded_end_addr(end_page));
+}
+
+bool G1PageBasedVirtualSpace::commit(size_t start_page, size_t size_in_pages) {
  // We need to make sure to commit all pages covered by the given area.
-  guarantee(is_area_uncommitted(start, size_in_pages), "Specified area is not uncommitted");
+  guarantee(is_area_uncommitted(start_page, size_in_pages), "Specified area is not uncommitted");

  bool zero_filled = true;
-  uintptr_t end = start + size_in_pages;
+  size_t end_page = start_page + size_in_pages;

  if (_special) {
    // Check for dirty pages and update zero_filled if any found.
-    if (_dirty.get_next_one_offset(start,end) < end) {
+    if (_dirty.get_next_one_offset(start_page, end_page) < end_page) {
      zero_filled = false;
-      _dirty.clear_range(start, end);
+      _dirty.clear_range(start_page, end_page);
    }
  } else {
-    os::commit_memory_or_exit(page_start(start), byte_size_for_pages(size_in_pages), _executable,
-                              err_msg("Failed to commit pages from "SIZE_FORMAT" of length "SIZE_FORMAT, start, size_in_pages));
+    commit_internal(start_page, end_page);
  }
-  _committed.set_range(start, end);
+  _committed.set_range(start_page, end_page);

  if (AlwaysPreTouch) {
-    os::pretouch_memory(page_start(start), page_start(end));
+    pretouch_internal(start_page, end_page);
  }
  return zero_filled;
 }

-void G1PageBasedVirtualSpace::uncommit(uintptr_t start, size_t size_in_pages) {
-  guarantee(is_area_committed(start, size_in_pages), "checking");
+void G1PageBasedVirtualSpace::uncommit_internal(size_t start_page, size_t end_page) {
+  guarantee(start_page < end_page,
+            err_msg("Given start page " SIZE_FORMAT " is larger or equal to end page " SIZE_FORMAT, start_page, end_page));

+  char* start_addr = page_start(start_page);
+  os::uncommit_memory(start_addr, pointer_delta(bounded_end_addr(end_page), start_addr, sizeof(char)));
+}
+
+void G1PageBasedVirtualSpace::uncommit(size_t start_page, size_t size_in_pages) {
+  guarantee(is_area_committed(start_page, size_in_pages), "checking");
+
+  size_t end_page = start_page + size_in_pages;
  if (_special) {
    // Mark that memory is dirty. If committed again the memory might
    // need to be cleared explicitly.
-    _dirty.set_range(start, start + size_in_pages);
+    _dirty.set_range(start_page, end_page);
  } else {
-    os::uncommit_memory(page_start(start), byte_size_for_pages(size_in_pages));
+    uncommit_internal(start_page, end_page);
  }

-  _committed.clear_range(start, start + size_in_pages);
+  _committed.clear_range(start_page, end_page);
 }

 bool G1PageBasedVirtualSpace::contains(const void* p) const {
@ -175,7 +258,8 @@ void G1PageBasedVirtualSpace::print_on(outputStream* out) {
  out->cr();
  out->print_cr(" - committed: " SIZE_FORMAT, committed_size());
  out->print_cr(" - reserved:  " SIZE_FORMAT, reserved_size());
-  out->print_cr(" - [low_b, high_b]: [" INTPTR_FORMAT ", " INTPTR_FORMAT "]",  p2i(_low_boundary), p2i(_high_boundary));
+  out->print_cr(" - preferred page size: " SIZE_FORMAT, _page_size);
+  out->print_cr(" - [low_b, high_b]: [" PTR_FORMAT ", " PTR_FORMAT "]",  p2i(_low_boundary), p2i(_high_boundary));
 }

 void G1PageBasedVirtualSpace::print() {
--- a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp
@ -34,6 +34,12 @@
 // granularity.
 // (De-)Allocation requests are always OS page aligned by passing a page index
 // and multiples of pages.
+// For systems that only commits of memory in a given size (always greater than
+// page size) the base address is required to be aligned to that page size.
+// The actual size requested need not be aligned to that page size, but the size
+// of the reservation passed may be rounded up to this page size. Any fragment
+// (less than the page size) of the actual size at the tail of the request will
+// be committed using OS small pages.
 // The implementation gives an error when trying to commit or uncommit pages that
 // have already been committed or uncommitted.
 class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC {
@ -43,7 +49,11 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC {
  char* _low_boundary;
  char* _high_boundary;

-  // The commit/uncommit granularity in bytes.
+  // The size of the tail in bytes of the handled space that needs to be committed
+  // using small pages.
+  size_t _tail_size;
+
+  // The preferred page size used for commit/uncommit in bytes.
  size_t _page_size;

  // Bitmap used for verification of commit/uncommit operations.
@ -62,30 +72,55 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC {
  // Indicates whether the committed space should be executable.
  bool _executable;

+  // Helper function for committing memory. Commit the given memory range by using
+  // _page_size pages as much as possible and the remainder with small sized pages.
+  void commit_internal(size_t start_page, size_t end_page);
+  // Commit num_pages pages of _page_size size starting from start. All argument
+  // checking has been performed.
+  void commit_preferred_pages(size_t start_page, size_t end_page);
+  // Commit space at the high end of the space that needs to be committed with small
+  // sized pages.
+  void commit_tail();
+
+  // Uncommit the given memory range.
+  void uncommit_internal(size_t start_page, size_t end_page);
+
+  // Pretouch the given memory range.
+  void pretouch_internal(size_t start_page, size_t end_page);
+
  // Returns the index of the page which contains the given address.
  uintptr_t  addr_to_page_index(char* addr) const;
  // Returns the address of the given page index.
-  char*  page_start(uintptr_t index);
-  // Returns the byte size of the given number of pages.
-  size_t byte_size_for_pages(size_t num);
+  char*  page_start(size_t index) const;
+
+  // Is the given page index the last page?
+  bool is_last_page(size_t index) const { return index == (_committed.size() - 1); }
+  // Is the given page index the first after last page?
+  bool is_after_last_page(size_t index) const;
+  // Is the last page only partially covered by this space?
+  bool is_last_page_partial() const { return !is_ptr_aligned(_high_boundary, _page_size); }
+  // Returns the end address of the given page bounded by the reserved space.
+  char* bounded_end_addr(size_t end_page) const;

  // Returns true if the entire area is backed by committed memory.
-  bool is_area_committed(uintptr_t start, size_t size_in_pages) const;
+  bool is_area_committed(size_t start_page, size_t size_in_pages) const;
  // Returns true if the entire area is not backed by committed memory.
-  bool is_area_uncommitted(uintptr_t start, size_t size_in_pages) const;
+  bool is_area_uncommitted(size_t start_page, size_t size_in_pages) const;

+  void initialize_with_page_size(ReservedSpace rs, size_t used_size, size_t page_size);
 public:

  // Commit the given area of pages starting at start being size_in_pages large.
  // Returns true if the given area is zero filled upon completion.
-  bool commit(uintptr_t start, size_t size_in_pages);
+  bool commit(size_t start_page, size_t size_in_pages);

  // Uncommit the given area of pages starting at start being size_in_pages large.
-  void uncommit(uintptr_t start, size_t size_in_pages);
+  void uncommit(size_t start_page, size_t size_in_pages);

-  // Initialization
-  G1PageBasedVirtualSpace();
-  bool initialize_with_granularity(ReservedSpace rs, size_t page_size);
+  // Initialize the given reserved space with the given base address and the size
+  // actually used.
+  // Prefer to commit in page_size chunks.
+  G1PageBasedVirtualSpace(ReservedSpace rs, size_t used_size, size_t page_size);

  // Destruction
  ~G1PageBasedVirtualSpace();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp
@ -26,8 +26,10 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
+#include "gc_implementation/g1/g1StringDedup.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
+#include "utilities/stack.inline.hpp"

 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp)
  : _g1h(g1h),
--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -63,7 +63,7 @@ inline void G1ParScanThreadState::do_oop_partial_array(oop* p) {
  assert(has_partial_array_mask(p), "invariant");
  oop from_obj = clear_partial_array_mask(p);

-  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
  assert(from_obj->is_objArray(), "must be obj array");
  objArrayOop from_obj_array = objArrayOop(from_obj);
  // The from-space object contains the real length.
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -31,17 +31,16 @@
 #include "utilities/bitMap.inline.hpp"

 G1RegionToSpaceMapper::G1RegionToSpaceMapper(ReservedSpace rs,
-                                             size_t commit_granularity,
+                                             size_t used_size,
+                                             size_t page_size,
                                             size_t region_granularity,
                                             MemoryType type) :
-  _storage(),
-  _commit_granularity(commit_granularity),
+  _storage(rs, used_size, page_size),
  _region_granularity(region_granularity),
  _listener(NULL),
  _commit_map() {
-  guarantee(is_power_of_2(commit_granularity), "must be");
+  guarantee(is_power_of_2(page_size), "must be");
  guarantee(is_power_of_2(region_granularity), "must be");
-  _storage.initialize_with_granularity(rs, commit_granularity);

  MemTracker::record_virtual_memory_type((address)rs.base(), type);
 }
@ -55,25 +54,26 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper {

 public:
  G1RegionsLargerThanCommitSizeMapper(ReservedSpace rs,
-                                      size_t os_commit_granularity,
+                                      size_t actual_size,
+                                      size_t page_size,
                                      size_t alloc_granularity,
                                      size_t commit_factor,
                                      MemoryType type) :
-     G1RegionToSpaceMapper(rs, os_commit_granularity, alloc_granularity, type),
-    _pages_per_region(alloc_granularity / (os_commit_granularity * commit_factor)) {
+    G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, type),
+    _pages_per_region(alloc_granularity / (page_size * commit_factor)) {

-    guarantee(alloc_granularity >= os_commit_granularity, "allocation granularity smaller than commit granularity");
+    guarantee(alloc_granularity >= page_size, "allocation granularity smaller than commit granularity");
    _commit_map.resize(rs.size() * commit_factor / alloc_granularity, /* in_resource_area */ false);
  }

-  virtual void commit_regions(uintptr_t start_idx, size_t num_regions) {
-    bool zero_filled = _storage.commit(start_idx * _pages_per_region, num_regions * _pages_per_region);
+  virtual void commit_regions(uint start_idx, size_t num_regions) {
+    bool zero_filled = _storage.commit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region);
    _commit_map.set_range(start_idx, start_idx + num_regions);
    fire_on_commit(start_idx, num_regions, zero_filled);
  }

-  virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions) {
-    _storage.uncommit(start_idx * _pages_per_region, num_regions * _pages_per_region);
+  virtual void uncommit_regions(uint start_idx, size_t num_regions) {
+    _storage.uncommit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region);
    _commit_map.clear_range(start_idx, start_idx + num_regions);
  }
 };
@ -98,22 +98,23 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {

 public:
  G1RegionsSmallerThanCommitSizeMapper(ReservedSpace rs,
-                                       size_t os_commit_granularity,
+                                       size_t actual_size,
+                                       size_t page_size,
                                       size_t alloc_granularity,
                                       size_t commit_factor,
                                       MemoryType type) :
-     G1RegionToSpaceMapper(rs, os_commit_granularity, alloc_granularity, type),
-    _regions_per_page((os_commit_granularity * commit_factor) / alloc_granularity), _refcounts() {
+    G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, type),
+    _regions_per_page((page_size * commit_factor) / alloc_granularity), _refcounts() {

-    guarantee((os_commit_granularity * commit_factor) >= alloc_granularity, "allocation granularity smaller than commit granularity");
-    _refcounts.initialize((HeapWord*)rs.base(), (HeapWord*)(rs.base() + rs.size()), os_commit_granularity);
+    guarantee((page_size * commit_factor) >= alloc_granularity, "allocation granularity smaller than commit granularity");
+    _refcounts.initialize((HeapWord*)rs.base(), (HeapWord*)(rs.base() + align_size_up(rs.size(), page_size)), page_size);
    _commit_map.resize(rs.size() * commit_factor / alloc_granularity, /* in_resource_area */ false);
  }

-  virtual void commit_regions(uintptr_t start_idx, size_t num_regions) {
-    for (uintptr_t i = start_idx; i < start_idx + num_regions; i++) {
-      assert(!_commit_map.at(i), err_msg("Trying to commit storage at region "INTPTR_FORMAT" that is already committed", i));
-      uintptr_t idx = region_idx_to_page_idx(i);
+  virtual void commit_regions(uint start_idx, size_t num_regions) {
+    for (uint i = start_idx; i < start_idx + num_regions; i++) {
+      assert(!_commit_map.at(i), err_msg("Trying to commit storage at region %u that is already committed", i));
+      size_t idx = region_idx_to_page_idx(i);
      uint old_refcount = _refcounts.get_by_index(idx);
      bool zero_filled = false;
      if (old_refcount == 0) {
@ -125,10 +126,10 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper {
    }
  }

-  virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions) {
-    for (uintptr_t i = start_idx; i < start_idx + num_regions; i++) {
-      assert(_commit_map.at(i), err_msg("Trying to uncommit storage at region "INTPTR_FORMAT" that is not committed", i));
-      uintptr_t idx = region_idx_to_page_idx(i);
+  virtual void uncommit_regions(uint start_idx, size_t num_regions) {
+    for (uint i = start_idx; i < start_idx + num_regions; i++) {
+      assert(_commit_map.at(i), err_msg("Trying to uncommit storage at region %u that is not committed", i));
+      size_t idx = region_idx_to_page_idx(i);
      uint old_refcount = _refcounts.get_by_index(idx);
      assert(old_refcount > 0, "must be");
      if (old_refcount == 1) {
@ -147,14 +148,15 @@ void G1RegionToSpaceMapper::fire_on_commit(uint start_idx, size_t num_regions, b
 }

 G1RegionToSpaceMapper* G1RegionToSpaceMapper::create_mapper(ReservedSpace rs,
-                                                            size_t os_commit_granularity,
+                                                            size_t actual_size,
+                                                            size_t page_size,
                                                            size_t region_granularity,
                                                            size_t commit_factor,
                                                            MemoryType type) {

-  if (region_granularity >= (os_commit_granularity * commit_factor)) {
-    return new G1RegionsLargerThanCommitSizeMapper(rs, os_commit_granularity, region_granularity, commit_factor, type);
+  if (region_granularity >= (page_size * commit_factor)) {
+    return new G1RegionsLargerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
  } else {
-    return new G1RegionsSmallerThanCommitSizeMapper(rs, os_commit_granularity, region_granularity, commit_factor, type);
+    return new G1RegionsSmallerThanCommitSizeMapper(rs, actual_size, page_size, region_granularity, commit_factor, type);
  }
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp
@ -46,12 +46,12 @@ class G1RegionToSpaceMapper : public CHeapObj<mtGC> {
 protected:
  // Backing storage.
  G1PageBasedVirtualSpace _storage;
-  size_t _commit_granularity;
+
  size_t _region_granularity;
  // Mapping management
  BitMap _commit_map;

-  G1RegionToSpaceMapper(ReservedSpace rs, size_t commit_granularity, size_t region_granularity, MemoryType type);
+  G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, MemoryType type);

  void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled);
 public:
@ -70,16 +70,20 @@ class G1RegionToSpaceMapper : public CHeapObj<mtGC> {
    return _commit_map.at(idx);
  }

-  virtual void commit_regions(uintptr_t start_idx, size_t num_regions = 1) = 0;
-  virtual void uncommit_regions(uintptr_t start_idx, size_t num_regions = 1) = 0;
+  virtual void commit_regions(uint start_idx, size_t num_regions = 1) = 0;
+  virtual void uncommit_regions(uint start_idx, size_t num_regions = 1) = 0;

  // Creates an appropriate G1RegionToSpaceMapper for the given parameters.
+  // The actual space to be used within the given reservation is given by actual_size.
+  // This is because some OSes need to round up the reservation size to guarantee
+  // alignment of page_size.
  // The byte_translation_factor defines how many bytes in a region correspond to
  // a single byte in the data structure this mapper is for.
  // Eg. in the card table, this value corresponds to the size a single card
-  // table entry corresponds to.
+  // table entry corresponds to in the heap.
  static G1RegionToSpaceMapper* create_mapper(ReservedSpace rs,
-                                              size_t os_commit_granularity,
+                                              size_t actual_size,
+                                              size_t page_size,
                                              size_t region_granularity,
                                              size_t byte_translation_factor,
                                              MemoryType type);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@ -38,6 +38,7 @@
 #include "oops/oop.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/intHisto.hpp"
+#include "utilities/stack.inline.hpp"

 #define CARD_REPEAT_HISTO 0

--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -57,7 +57,7 @@ inline void G1RemSet::par_write_ref(HeapRegion* from, T* p, uint tid) {
  oopDesc* o = obj;
 #endif // CHECK_UNHANDLED_OOPS
  assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
-  assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  assert(_g1->is_in_reserved(obj), "must be in heap");
 #endif // ASSERT

  assert(from == NULL || from->is_in_reserved(p), "p is not in from");
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.cpp
@ -116,7 +116,7 @@ void G1RootProcessor::wait_until_all_strong_classes_discovered() {
 G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
    _g1h(g1h),
    _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
-    _srs(g1h),
+    _srs(),
    _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never),
    _n_workers_discovered_strong_classes(0) {}

@ -253,7 +253,8 @@ void G1RootProcessor::process_java_roots(OopClosure* strong_roots,

  {
    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
-    Threads::possibly_parallel_oops_do(strong_roots, thread_stack_clds, strong_code);
+    bool is_par = _g1h->n_par_threads() > 0;
+    Threads::possibly_parallel_oops_do(is_par, strong_roots, thread_stack_clds, strong_code);
  }
 }

@ -323,10 +324,6 @@ void G1RootProcessor::process_vm_roots(OopClosure* strong_roots,
 void G1RootProcessor::scan_remembered_sets(G1ParPushHeapRSClosure* scan_rs,
                                           OopClosure* scan_non_heap_weak_roots,
                                           uint worker_i) {
-  G1GCPhaseTimes* phase_times = _g1h->g1_policy()->phase_times();
-  G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::CodeCacheRoots, worker_i);
-
-  // Now scan the complement of the collection set.
  G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);

  _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RootProcessor.hpp
@ -26,7 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_ROOTPROCESSOR_HPP

 #include "memory/allocation.hpp"
-#include "memory/sharedHeap.hpp"
+#include "memory/strongRootsScope.hpp"
 #include "runtime/mutex.hpp"

 class CLDClosure;
@ -46,7 +46,7 @@ class SubTasksDone;
 class G1RootProcessor : public StackObj {
  G1CollectedHeap* _g1h;
  SubTasksDone* _process_strong_tasks;
-  SharedHeap::StrongRootsScope _srs;
+  StrongRootsScope _srs;

  // Used to implement the Thread work barrier.
  Monitor _lock;
--- a/Show More
+++ b/Show More