Merge

2015-09-14 07:03:04 +00:00 · 2015-09-14 07:03:04 +00:00 · 662e39edbb
commit 662e39edbb
parent 3780022a87 d67924dc8e
563 changed files with 15498 additions and 10966 deletions
--- a/.hgtags
+++ b/.hgtags
@ -322,3 +322,4 @@ eeea9adfd1e3d075ef82148c00a4847a1aab4d26 jdk9-b76
 c25e882cee9622ec75c4e9d60633539a2f0a8809 jdk9-b77
 c8753d0be1778944dc512ec86a459941ea1ad2c3 jdk9-b78
 3966bd3b8167419aa05c6718a4af1cf54b1e3c58 jdk9-b79
+3c9f5bd909ae7187f24622ee4b69f8a5756a9271 jdk9-b80
--- a/.hgtags-top-repo
+++ b/.hgtags-top-repo
@ -322,3 +322,4 @@ d82072b699b880a1f647a5e2d7c0f86cec958941 jdk9-b76
 7972dc8f2a47f0c4cd8f02fa5662af41f028aa14 jdk9-b77
 8c40d4143ee13bdf8170c68cc384c36ab1e9fadb jdk9-b78
 ba08a9f79b9849716bae1f39f71333d47f604012 jdk9-b79
+f7c5ae2933c0b8510a420d1713a955e4ffc7ad0b jdk9-b80
--- a/common/bin/logger.sh
+++ b/common/bin/logger.sh
@ -41,5 +41,19 @@ RCDIR=`mktemp -dt jdk-build-logger.tmp.XXXXXX` || exit $?
 trap "rm -rf \"$RCDIR\"" EXIT
 LOGFILE=$1
 shift
+
+# We need to handle command likes like "VAR1=val1 /usr/bin/cmd VAR2=val2".
+# Do this by shifting away prepended variable assignments, and export them
+# instead.
+is_prefix=true
+for opt; do
+  if [[ "$is_prefix" = true && "$opt" =~ ^.*=.*$ ]]; then
+    export $opt
+    shift
+  else
+    is_prefix=false
+  fi
+done
+
 (exec 3>&1 ; ("$@" 2>&1 1>&3; echo $? > "$RCDIR/rc") | tee -a $LOGFILE 1>&2 ; exec 3>&-) | tee -a $LOGFILE
 exit `cat "$RCDIR/rc"`
--- a/corba/.hgtags
+++ b/corba/.hgtags
@ -322,3 +322,4 @@ d8126bc88fa5cd1ae4e44d86a4b1280ca1c9e2aa jdk9-b76
 8bb2441c0fec8b28f7bf11a0ca3ec1642e7ef457 jdk9-b77
 182bb7accc5253bcfefd8edc1d4997ec8f9f8694 jdk9-b78
 4ab250b8fac66ef8cd15ee78c40f0c651c96e16a jdk9-b79
+821a0373ef2d1642a9824facb938b901ad010413 jdk9-b80
--- a/hotspot/.hgtags
+++ b/hotspot/.hgtags
@ -482,3 +482,4 @@ fff6b54e9770ac4c12c2fb4cab5aa7672affa4bd jdk9-b74
 e66c3813789debfc06f206afde1bf7a84cb08451 jdk9-b77
 20dc06b04fe5ec373879414d60ef82ac70faef98 jdk9-b78
 e9e63d93bbfe2c6c23447e2c1f5cc71c98671cba jdk9-b79
+8e8377739c06b99b9011c003c77e0bef84c91e09 jdk9-b80
--- a/hotspot/make/Makefile
+++ b/hotspot/make/Makefile
@ -633,9 +633,9 @@ create_jdk: copy_jdk update_jdk

 update_jdk: export_product_jdk export_fastdebug_jdk test_jdk

-copy_jdk: $(JDK_IMAGE_DIR)/jre/lib/rt.jar
+copy_jdk: $(JDK_IMAGE_DIR)/bin/java

-$(JDK_IMAGE_DIR)/jre/lib/rt.jar:
+$(JDK_IMAGE_DIR)/bin/java:
 	$(RM) -r $(JDK_IMAGE_DIR)
 	$(MKDIR) -p $(JDK_IMAGE_DIR)
 	($(CD) $(JDK_IMPORT_PATH) && \
--- a/hotspot/make/aix/makefiles/mapfile-vers-debug
+++ b/hotspot/make/aix/makefiles/mapfile-vers-debug
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/aix/makefiles/mapfile-vers-product
+++ b/hotspot/make/aix/makefiles/mapfile-vers-product
@ -139,18 +139,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/bsd/makefiles/mapfile-vers-darwin-debug
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-darwin-debug
@ -139,18 +139,6 @@
                _JVM_Halt
                _JVM_HoldsLock
                _JVM_IHashCode
-                _JVM_ImageAttributeOffsets
-                _JVM_ImageAttributeOffsetsLength
-                _JVM_ImageClose
-                _JVM_ImageFindAttributes
-                _JVM_ImageGetAttributes
-                _JVM_ImageGetAttributesCount
-                _JVM_ImageGetDataAddress
-                _JVM_ImageGetIndexAddress
-                _JVM_ImageGetStringBytes
-                _JVM_ImageOpen
-                _JVM_ImageRead
-                _JVM_ImageReadCompressed
                _JVM_InitAgentProperties
                _JVM_InitProperties
                _JVM_InternString
--- a/hotspot/make/bsd/makefiles/mapfile-vers-darwin-product
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-darwin-product
@ -139,18 +139,6 @@
                _JVM_Halt
                _JVM_HoldsLock
                _JVM_IHashCode
-                _JVM_ImageAttributeOffsets
-                _JVM_ImageAttributeOffsetsLength
-                _JVM_ImageClose
-                _JVM_ImageFindAttributes
-                _JVM_ImageGetAttributes
-                _JVM_ImageGetAttributesCount
-                _JVM_ImageGetDataAddress
-                _JVM_ImageGetIndexAddress
-                _JVM_ImageGetStringBytes
-                _JVM_ImageOpen
-                _JVM_ImageRead
-                _JVM_ImageReadCompressed
                _JVM_InitAgentProperties
                _JVM_InitProperties
                _JVM_InternString
--- a/hotspot/make/bsd/makefiles/mapfile-vers-debug
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-debug
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/bsd/makefiles/mapfile-vers-product
+++ b/hotspot/make/bsd/makefiles/mapfile-vers-product
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/bsd/makefiles/vm.make
+++ b/hotspot/make/bsd/makefiles/vm.make
@ -131,7 +131,7 @@ endif
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM

-JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH)
+JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH)

 #----------------------------------------------------------------------
 # jvm_db & dtrace
--- a/hotspot/make/build.sh
+++ b/hotspot/make/build.sh
@ -49,7 +49,7 @@ fi
 # Just in case:
 JAVA_HOME=`( cd $JAVA_HOME; pwd )`

-if [ "${ALT_BOOTDIR-}" = ""  -o  ! -d "${ALT_BOOTDIR-}" -o ! -d ${ALT_BOOTDIR-}/jre/lib/ ]; then
+if [ "${ALT_BOOTDIR-}" = ""  -o  ! -d "${ALT_BOOTDIR-}" -o ! -d ${ALT_BOOTDIR-}/lib/ ]; then
    ALT_BOOTDIR=${JAVA_HOME}
 fi

--- a/hotspot/make/hotspot.script
+++ b/hotspot/make/hotspot.script
@ -127,7 +127,7 @@ fi
 #     o		$JRE/lib/$ARCH
 # followed by the user's previous effective LD_LIBRARY_PATH, if
 # any.
-JRE=$JDK/jre
+JRE=$JDK
 JAVA_HOME=$JDK
 export JAVA_HOME

--- a/hotspot/make/linux/makefiles/mapfile-vers-debug
+++ b/hotspot/make/linux/makefiles/mapfile-vers-debug
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/linux/makefiles/mapfile-vers-product
+++ b/hotspot/make/linux/makefiles/mapfile-vers-product
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/solaris/makefiles/adlc.make
+++ b/hotspot/make/solaris/makefiles/adlc.make
@ -76,6 +76,11 @@ endif
 ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 509), 1)
  CFLAGS_WARN = +w -errwarn
 endif
+# When using compiler version 5.13 (Solaris Studio 12.4), calls to explicitly 
+# instantiated template functions trigger this warning when +w is active.
+ifeq ($(shell expr $(COMPILER_REV_NUMERIC) \>= 513), 1)
+  CFLAGS_WARN += -erroff=notemsource
+endif
 CFLAGS += $(CFLAGS_WARN)

 ifeq ("${Platform_compiler}", "sparcWorks")
--- a/hotspot/make/solaris/makefiles/buildtree.make
+++ b/hotspot/make/solaris/makefiles/buildtree.make
@ -270,6 +270,7 @@ flags.make: $(BUILDTREE_MAKE) ../shared_dirs.lst
 	echo "CP ?= cp"; \
 	echo "MV ?= mv"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(VARIANT).make"; \
+	echo "include \$$(GAMMADIR)/make/excludeSrc.make"; \
 	echo "include \$$(GAMMADIR)/make/$(OS_FAMILY)/makefiles/$(COMPILER).make"; \
 	) > $@

--- a/hotspot/make/solaris/makefiles/mapfile-vers
+++ b/hotspot/make/solaris/makefiles/mapfile-vers
@ -141,18 +141,6 @@ SUNWprivate_1.1 {
                JVM_Halt;
                JVM_HoldsLock;
                JVM_IHashCode;
-                JVM_ImageAttributeOffsets;
-                JVM_ImageAttributeOffsetsLength;
-                JVM_ImageClose;
-                JVM_ImageFindAttributes;
-                JVM_ImageGetAttributes;
-                JVM_ImageGetAttributesCount;
-                JVM_ImageGetDataAddress;
-                JVM_ImageGetIndexAddress;
-                JVM_ImageGetStringBytes;
-                JVM_ImageOpen;
-                JVM_ImageRead;
-                JVM_ImageReadCompressed;
                JVM_InitAgentProperties;
                JVM_InitProperties;
                JVM_InternString;
--- a/hotspot/make/solaris/makefiles/vm.make
+++ b/hotspot/make/solaris/makefiles/vm.make
@ -197,7 +197,7 @@ Src_Dirs/COMPILER1 := $(CORE_PATHS) $(COMPILER1_PATHS)
 Src_Dirs/COMPILER2 := $(CORE_PATHS) $(COMPILER2_PATHS)
 Src_Dirs/TIERED    := $(CORE_PATHS) $(COMPILER1_PATHS) $(COMPILER2_PATHS)
 Src_Dirs/ZERO      := $(CORE_PATHS)
-Src_Dirs/SHARK     := $(CORE_PATHS)
+Src_Dirs/SHARK     := $(CORE_PATHS) $(SHARK_PATHS)
 Src_Dirs := $(Src_Dirs/$(TYPE))

 COMPILER2_SPECIFIC_FILES := opto libadt bcEscapeAnalyzer.cpp c2_\* runtime_\*
@ -206,7 +206,7 @@ SHARK_SPECIFIC_FILES     := shark
 ZERO_SPECIFIC_FILES      := zero

 # Always exclude these.
-Src_Files_EXCLUDE := dtrace jsig.c jvmtiEnvRecommended.cpp jvmtiEnvStub.cpp
+Src_Files_EXCLUDE += dtrace jsig.c jvmtiEnvRecommended.cpp jvmtiEnvStub.cpp

 # Exclude per type.
 Src_Files_EXCLUDE/CORE      := $(COMPILER1_SPECIFIC_FILES) $(COMPILER2_SPECIFIC_FILES) $(ZERO_SPECIFIC_FILES) $(SHARK_SPECIFIC_FILES) ciTypeFlow.cpp
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad
@ -3803,82 +3803,38 @@ encode %{

  enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
    MacroAssembler _masm(&cbuf);
-    Register old_reg = as_Register($oldval$$reg);
-    Register new_reg = as_Register($newval$$reg);
-    Register base = as_Register($mem$$base);
-    Register addr_reg;
-    int index = $mem$$index;
-    int scale = $mem$$scale;
-    int disp = $mem$$disp;
-    if (index == -1) {
-       if (disp != 0) {
-        __ lea(rscratch2, Address(base, disp));
-        addr_reg = rscratch2;
-      } else {
-        // TODO
-        // should we ever get anything other than this case?
-        addr_reg = base;
-      }
-    } else {
-      Register index_reg = as_Register(index);
-      if (disp == 0) {
-        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
-        addr_reg = rscratch2;
-      } else {
-        __ lea(rscratch2, Address(base, disp));
-        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
-        addr_reg = rscratch2;
-      }
-    }
-    Label retry_load, done;
-    __ bind(retry_load);
-    __ ldxr(rscratch1, addr_reg);
-    __ cmp(rscratch1, old_reg);
-    __ br(Assembler::NE, done);
-    __ stlxr(rscratch1, new_reg, addr_reg);
-    __ cbnzw(rscratch1, retry_load);
-    __ bind(done);
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
+               &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
  %}

  enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
    MacroAssembler _masm(&cbuf);
-    Register old_reg = as_Register($oldval$$reg);
-    Register new_reg = as_Register($newval$$reg);
-    Register base = as_Register($mem$$base);
-    Register addr_reg;
-    int index = $mem$$index;
-    int scale = $mem$$scale;
-    int disp = $mem$$disp;
-    if (index == -1) {
-       if (disp != 0) {
-        __ lea(rscratch2, Address(base, disp));
-        addr_reg = rscratch2;
-      } else {
-        // TODO
-        // should we ever get anything other than this case?
-        addr_reg = base;
-      }
-    } else {
-      Register index_reg = as_Register(index);
-      if (disp == 0) {
-        __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
-        addr_reg = rscratch2;
-      } else {
-        __ lea(rscratch2, Address(base, disp));
-        __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
-        addr_reg = rscratch2;
-      }
-    }
-    Label retry_load, done;
-    __ bind(retry_load);
-    __ ldxrw(rscratch1, addr_reg);
-    __ cmpw(rscratch1, old_reg);
-    __ br(Assembler::NE, done);
-    __ stlxrw(rscratch1, new_reg, addr_reg);
-    __ cbnzw(rscratch1, retry_load);
-    __ bind(done);
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
+               &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
  %}

+
+  // The only difference between aarch64_enc_cmpxchg and
+  // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
+  // CompareAndSwap sequence to serve as a barrier on acquiring a
+  // lock.
+  enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
+               &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
+  %}
+
+  enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
+    MacroAssembler _masm(&cbuf);
+    guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
+    __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
+               &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
+  %}
+
+
  // auxiliary used for CompareAndSwapX to set result register
  enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
    MacroAssembler _masm(&cbuf);
@ -4373,12 +4329,12 @@ encode %{
      return;
    }

-    if (UseBiasedLocking) {
-      __ biased_locking_enter(disp_hdr, oop, box, tmp, true, cont);
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
+      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
    }

    // Handle existing monitor
-    if (EmitSync & 0x02) {
+    if ((EmitSync & 0x02) == 0) {
      // we can use AArch64's bit test and branch here but
      // markoopDesc does not define a bit index just the bit value
      // so assert in case the bit pos changes
@ -4398,13 +4354,10 @@ encode %{

    // Compare object markOop with mark and if equal exchange scratch1
    // with object markOop.
-    // Note that this is simply a CAS: it does not generate any
-    // barriers.  These are separately generated by
-    // membar_acquire_lock().
    {
      Label retry_load;
      __ bind(retry_load);
-      __ ldxr(tmp, oop);
+      __ ldaxr(tmp, oop);
      __ cmp(tmp, disp_hdr);
      __ br(Assembler::NE, cas_failed);
      // use stlxr to ensure update is immediately visible
@ -4454,7 +4407,7 @@ encode %{
      {
        Label retry_load, fail;
        __ bind(retry_load);
-        __ ldxr(rscratch1, tmp);
+        __ ldaxr(rscratch1, tmp);
        __ cmp(disp_hdr, rscratch1);
        __ br(Assembler::NE, fail);
        // use stlxr to ensure update is immediately visible
@ -4518,7 +4471,7 @@ encode %{
      return;
    }

-    if (UseBiasedLocking) {
+    if (UseBiasedLocking && !UseOptoBiasInlining) {
      __ biased_locking_exit(oop, tmp, cont);
    }

@ -8017,10 +7970,10 @@ instruct membar_acquire_lock() %{
  match(MemBarAcquireLock);
  ins_cost(VOLATILE_REF_COST);

-  format %{ "membar_acquire_lock" %}
+  format %{ "membar_acquire_lock (elided)" %}

  ins_encode %{
-    __ membar(Assembler::LoadLoad|Assembler::LoadStore);
+    __ block_comment("membar_acquire_lock (elided)");
  %}

  ins_pipe(pipe_serial);
@ -8080,10 +8033,10 @@ instruct membar_release_lock() %{
  match(MemBarReleaseLock);
  ins_cost(VOLATILE_REF_COST);

-  format %{ "membar_release_lock" %}
+  format %{ "membar_release_lock (elided)" %}

  ins_encode %{
-    __ membar(Assembler::LoadStore|Assembler::StoreStore);
+    __ block_comment("membar_release_lock (elided)");
  %}

  ins_pipe(pipe_serial);
@ -8369,7 +8322,11 @@ instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFla
  ins_pipe(pipe_serial);
 %}

-// this has to be implemented as a CAS
+
+// storeLConditional is used by PhaseMacroExpand::expand_lock_node
+// when attempting to rebias a lock towards the current thread.  We
+// must use the acquire form of cmpxchg in order to guarantee acquire
+// semantics in this case.
 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
 %{
  match(Set cr (StoreLConditional mem (Binary oldval newval)));
@ -8381,12 +8338,14 @@ instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFl
    "cmpw rscratch1, zr\t# EQ on successful write"
  %}

-  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval));
+  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));

  ins_pipe(pipe_slow);
 %}

-// this has to be implemented as a CAS
+// storeIConditional also has acquire semantics, for no better reason
+// than matching storeLConditional.  At the time of writing this
+// comment storeIConditional was not used anywhere by AArch64.
 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
 %{
  match(Set cr (StoreIConditional mem (Binary oldval newval)));
@ -8398,7 +8357,7 @@ instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFl
    "cmpw rscratch1, zr\t# EQ on successful write"
  %}

-  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval));
+  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));

  ins_pipe(pipe_slow);
 %}
--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
@ -1210,7 +1210,7 @@ public:

  INSN(ldrs, 0b00, 1);
  INSN(ldrd, 0b01, 1);
-  INSN(ldrq, 0x10, 1);
+  INSN(ldrq, 0b10, 1);

 #undef INSN

@ -2285,13 +2285,13 @@ public:
 #undef INSN

  // Table vector lookup
-#define INSN(NAME, op)                                                                                       \
-  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) {  \
-    starti;                                                                                                  \
-    assert(T == T8B || T == T16B, "invalid arrangement");                                                    \
-    assert(0 < registers && registers <= 4, "invalid number of registers");                                  \
-    f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15);                               \
-    f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0);                               \
+#define INSN(NAME, op)                                                  \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
+    starti;                                                             \
+    assert(T == T8B || T == T16B, "invalid arrangement");               \
+    assert(0 < registers && registers <= 4, "invalid number of registers"); \
+    f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
+    f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
  }

  INSN(tbl, 0);
@ -2299,6 +2299,7 @@ public:

 #undef INSN

+  // AdvSIMD two-reg misc
 #define INSN(NAME, U, opcode)                                                       \
  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {               \
       starti;                                                                      \
@ -2316,10 +2317,19 @@ public:

 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
  INSN(rev32, 1, 0b00000);
+private:
+  INSN(_rbit, 1, 0b00101);
+public:
+
 #undef ASSERTION

 #define ASSERTION (T == T8B || T == T16B)
  INSN(rev16, 0, 0b00001);
+  // RBIT only allows T8B and T16B but encodes them oddly.  Argh...
+  void rbit(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
+    assert((ASSERTION), MSG);
+    _rbit(Vd, SIMD_Arrangement(T & 1 | 0b010), Vn);
+  }
 #undef ASSERTION

 #undef MSG
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@ -3043,7 +3043,9 @@ void MacroAssembler::store_check(Register obj) {
  // register obj is destroyed afterwards.

  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  assert(bs->kind() == BarrierSet::CardTableForRS ||
+         bs->kind() == BarrierSet::CardTableExtension,
+         "Wrong barrier set kind");

  CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@ -917,6 +917,8 @@ public:

  void cmpptr(Register src1, Address src2);

+  // Various forms of CAS
+
  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
                  Label &suceed, Label *fail);

@ -938,6 +940,23 @@ public:
    str(rscratch2, adr);
  }

+  // A generic CAS; success or failure is in the EQ flag.
+  template <typename T1, typename T2>
+  void cmpxchg(Register addr, Register expected, Register new_val,
+               T1 load_insn,
+               void (MacroAssembler::*cmp_insn)(Register, Register),
+               T2 store_insn,
+               Register tmp = rscratch1) {
+    Label retry_load, done;
+    bind(retry_load);
+    (this->*load_insn)(tmp, addr);
+    (this->*cmp_insn)(tmp, expected);
+    br(Assembler::NE, done);
+    (this->*store_insn)(tmp, new_val, addr);
+    cbnzw(tmp, retry_load);
+    bind(done);
+  }
+
  // Calls

  address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
@ -691,7 +691,7 @@ class StubGenerator: public StubCodeGenerator {
        __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
        __ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
@ -731,7 +731,7 @@ class StubGenerator: public StubCodeGenerator {
          __ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
        {
          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
@ -2364,7 +2364,7 @@ class StubGenerator: public StubCodeGenerator {
   *   c_rarg3   - int* table
   *
   * Ouput:
-   *       rax   - int crc result
+   *       r0   - int crc result
   */
  address generate_updateBytesCRC32C() {
    assert(UseCRC32CIntrinsics, "what are we doing here?");
@ -2435,6 +2435,69 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+  void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
+                      FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
+                      FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
+    // Karatsuba multiplication performs a 128*128 -> 256-bit
+    // multiplication in three 128-bit multiplications and a few
+    // additions.
+    //
+    // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
+    // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
+    //
+    // Inputs:
+    //
+    // A0 in a.d[0]     (subkey)
+    // A1 in a.d[1]
+    // (A1+A0) in a1_xor_a0.d[0]
+    //
+    // B0 in b.d[0]     (state)
+    // B1 in b.d[1]
+
+    __ ext(tmp1, __ T16B, b, b, 0x08);
+    __ pmull2(result_hi, __ T1Q, b, a, __ T2D);  // A1*B1
+    __ eor(tmp1, __ T16B, tmp1, b);            // (B1+B0)
+    __ pmull(result_lo,  __ T1Q, b, a, __ T1D);  // A0*B0
+    __ pmull(tmp2, __ T1Q, tmp1, a1_xor_a0, __ T1D); // (A1+A0)(B1+B0)
+
+    __ ext(tmp4, __ T16B, result_lo, result_hi, 0x08);
+    __ eor(tmp3, __ T16B, result_hi, result_lo); // A1*B1+A0*B0
+    __ eor(tmp2, __ T16B, tmp2, tmp4);
+    __ eor(tmp2, __ T16B, tmp2, tmp3);
+
+    // Register pair <result_hi:result_lo> holds the result of carry-less multiplication
+    __ ins(result_hi, __ D, tmp2, 0, 1);
+    __ ins(result_lo, __ D, tmp2, 1, 0);
+  }
+
+  void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
+                    FloatRegister p, FloatRegister z, FloatRegister t1) {
+    const FloatRegister t0 = result;
+
+    // The GCM field polynomial f is z^128 + p(z), where p =
+    // z^7+z^2+z+1.
+    //
+    //    z^128 === -p(z)  (mod (z^128 + p(z)))
+    //
+    // so, given that the product we're reducing is
+    //    a == lo + hi * z^128
+    // substituting,
+    //      === lo - hi * p(z)  (mod (z^128 + p(z)))
+    //
+    // we reduce by multiplying hi by p(z) and subtracting the result
+    // from (i.e. XORing it with) lo.  Because p has no nonzero high
+    // bits we can do this with two 64-bit multiplications, lo*p and
+    // hi*p.
+
+    __ pmull2(t0, __ T1Q, hi, p, __ T2D);
+    __ ext(t1, __ T16B, t0, z, 8);
+    __ eor(hi, __ T16B, hi, t1);
+    __ ext(t1, __ T16B, z, t0, 8);
+    __ eor(lo, __ T16B, lo, t1);
+    __ pmull(t0, __ T1Q, hi, p, __ T1D);
+    __ eor(result, __ T16B, lo, t0);
+  }
+
  /**
   *  Arguments:
   *
@ -2448,10 +2511,27 @@ class StubGenerator: public StubCodeGenerator {
   *  Updated state at c_rarg0
   */
  address generate_ghash_processBlocks() {
-    __ align(CodeEntryAlignment);
-    Label L_ghash_loop, L_exit;
+    // Bafflingly, GCM uses little-endian for the byte order, but
+    // big-endian for the bit order.  For example, the polynomial 1 is
+    // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
+    //
+    // So, we must either reverse the bytes in each word and do
+    // everything big-endian or reverse the bits in each byte and do
+    // it little-endian.  On AArch64 it's more idiomatic to reverse
+    // the bits in each byte (we have an instruction, RBIT, to do
+    // that) and keep the data in little-endian bit order throught the
+    // calculation, bit-reversing the inputs and outputs.

    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    __ align(wordSize * 2);
+    address p = __ pc();
+    __ emit_int64(0x87);  // The low-order bits of the field
+                          // polynomial (i.e. p = z^7+z^2+z+1)
+                          // repeated in the low and high parts of a
+                          // 128-bit vector
+    __ emit_int64(0x87);
+
+    __ align(CodeEntryAlignment);
    address start = __ pc();

    Register state   = c_rarg0;
@ -2462,104 +2542,43 @@ class StubGenerator: public StubCodeGenerator {
    FloatRegister vzr = v30;
    __ eor(vzr, __ T16B, vzr, vzr); // zero register

-    __ mov(v26, __ T16B, 1);
-    __ mov(v27, __ T16B, 63);
-    __ mov(v28, __ T16B, 62);
-    __ mov(v29, __ T16B, 57);
+    __ ldrq(v0, Address(state));
+    __ ldrq(v1, Address(subkeyH));

-    __ ldrq(v6, Address(state));
-    __ ldrq(v16, Address(subkeyH));
+    __ rev64(v0, __ T16B, v0);          // Bit-reverse words in state and subkeyH
+    __ rbit(v0, __ T16B, v0);
+    __ rev64(v1, __ T16B, v1);
+    __ rbit(v1, __ T16B, v1);

-    __ ext(v0, __ T16B, v6, v6, 0x08);
-    __ ext(v1, __ T16B, v16, v16, 0x08);
-    __ eor(v16, __ T16B, v16, v1);
+    __ ldrq(v26, p);

-    __ bind(L_ghash_loop);
+    __ ext(v16, __ T16B, v1, v1, 0x08); // long-swap subkeyH into v1
+    __ eor(v16, __ T16B, v16, v1);      // xor subkeyH into subkeyL (Karatsuba: (A1+A0))

-    __ ldrq(v2, Address(__ post(data, 0x10)));
-    __ rev64(v2, __ T16B, v2); // swap data
+    {
+      Label L_ghash_loop;
+      __ bind(L_ghash_loop);

-    __ ext(v6, __ T16B, v0, v0, 0x08);
-    __ eor(v6, __ T16B, v6, v2);
-    __ ext(v2, __ T16B, v6, v6, 0x08);
+      __ ldrq(v2, Address(__ post(data, 0x10))); // Load the data, bit
+                                                 // reversing each byte
+      __ rbit(v2, __ T16B, v2);
+      __ eor(v2, __ T16B, v0, v2);   // bit-swapped data ^ bit-swapped state

-    __ pmull2(v7, __ T1Q, v2, v1, __ T2D);  // A1*B1
-    __ eor(v6, __ T16B, v6, v2);
-    __ pmull(v5,  __ T1Q, v2, v1, __ T1D);  // A0*B0
-    __ pmull(v20, __ T1Q, v6, v16, __ T1D);  // (A1 + A0)(B1 + B0)
+      // Multiply state in v2 by subkey in v1
+      ghash_multiply(/*result_lo*/v5, /*result_hi*/v7,
+                     /*a*/v1, /*b*/v2, /*a1_xor_a0*/v16,
+                     /*temps*/v6, v20, v18, v21);
+      // Reduce v7:v5 by the field polynomial
+      ghash_reduce(v0, v5, v7, v26, vzr, v20);

-    __ ext(v21, __ T16B, v5, v7, 0x08);
-    __ eor(v18, __ T16B, v7, v5); // A1*B1 xor A0*B0
-    __ eor(v20, __ T16B, v20, v21);
-    __ eor(v20, __ T16B, v20, v18);
+      __ sub(blocks, blocks, 1);
+      __ cbnz(blocks, L_ghash_loop);
+    }

-    // Registers pair <v7:v5> holds the result of carry-less multiplication
-    __ ins(v7, __ D, v20, 0, 1);
-    __ ins(v5, __ D, v20, 1, 0);
+    // The bit-reversed result is at this point in v0
+    __ rev64(v1, __ T16B, v0);
+    __ rbit(v1, __ T16B, v1);

-    // Result of the multiplication is shifted by one bit position
-    // [X3:X2:X1:X0] = [X3:X2:X1:X0] << 1
-    __ ushr(v18, __ T2D, v5, -63 & 63);
-    __ ins(v25, __ D, v18, 1, 0);
-    __ ins(v25, __ D, vzr, 0, 0);
-    __ ushl(v5, __ T2D, v5, v26);
-    __ orr(v5, __ T16B, v5, v25);
-
-    __ ushr(v19, __ T2D, v7, -63 & 63);
-    __ ins(v19, __ D, v19, 1, 0);
-    __ ins(v19, __ D, v18, 0, 1);
-    __ ushl(v7, __ T2D, v7, v26);
-    __ orr(v6, __ T16B, v7, v19);
-
-    __ ins(v24, __ D, v5, 0, 1);
-
-    // A = X0 << 63
-    __ ushl(v21, __ T2D, v5, v27);
-
-    // A = X0 << 62
-    __ ushl(v22, __ T2D, v5, v28);
-
-    // A = X0 << 57
-    __ ushl(v23, __ T2D, v5, v29);
-
-    // D = X1^A^B^C
-    __ eor(v21, __ T16B, v21, v22);
-    __ eor(v21, __ T16B, v21, v23);
-    __ eor(v21, __ T16B, v21, v24);
-    __ ins(v5, __ D, v21, 1, 0);
-
-    // [E1:E0] = [D:X0] >> 1
-    __ ushr(v20, __ T2D, v5, -1 & 63);
-    __ ushl(v18, __ T2D, v5, v27);
-    __ ext(v25, __ T16B, v18, vzr, 0x08);
-    __ orr(v19, __ T16B, v20, v25);
-
-    __ eor(v7, __ T16B, v5, v19);
-
-    // [F1:F0] = [D:X0] >> 2
-    __ ushr(v20, __ T2D, v5, -2 & 63);
-    __ ushl(v18, __ T2D, v5, v28);
-    __ ins(v25, __ D, v18, 0, 1);
-    __ orr(v19, __ T16B, v20, v25);
-
-    __ eor(v7, __ T16B, v7, v19);
-
-    // [G1:G0] = [D:X0] >> 7
-    __ ushr(v20, __ T2D, v5, -7 & 63);
-    __ ushl(v18, __ T2D, v5, v29);
-    __ ins(v25, __ D, v18, 0, 1);
-    __ orr(v19, __ T16B, v20, v25);
-
-    // [H1:H0] = [D^E1^F1^G1:X0^E0^F0^G0]
-    __ eor(v7, __ T16B, v7, v19);
-
-    // Result = [H1:H0]^[X3:X2]
-    __ eor(v0, __ T16B, v7, v6);
-
-    __ subs(blocks, blocks, 1);
-    __ cbnz(blocks, L_ghash_loop);
-
-    __ ext(v1, __ T16B, v0, v0, 0x08);
    __ st1(v1, __ T16B, state);
    __ ret(lr);

--- a/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp
@ -186,7 +186,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
      }
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      {
        if (val == noreg) {
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp
@ -177,6 +177,12 @@ void VM_Version::get_processor_features() {
  if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
    warning("UseCRC32 specified, but not supported on this CPU");
  }
+
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
  if (auxv & HWCAP_AES) {
    UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
    UseAESIntrinsics =
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -2614,7 +2614,7 @@ void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register t
 void MacroAssembler::card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp) {
  CardTableModRefBS* bs =
    barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set());
-  assert(bs->kind() == BarrierSet::CardTableModRef ||
+  assert(bs->kind() == BarrierSet::CardTableForRS ||
         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
 #ifdef ASSERT
  cmpdi(CCR0, Rnew_val, 0);
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@ -656,7 +656,7 @@ class StubGenerator: public StubCodeGenerator {
          __ bind(filtered);
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
@ -697,7 +697,7 @@ class StubGenerator: public StubCodeGenerator {
          }
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
        {
          Label Lskip_loop, Lstore_loop;
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
@ -105,7 +105,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
      }
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      {
        Label Lnull, Ldone;
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -200,6 +200,11 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
  }

+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
    UseMultiplyToLenIntrinsic = true;
  }
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
@ -3958,7 +3958,7 @@ void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_v
  if (new_val == G0) return;
  CardTableModRefBS* bs =
    barrier_set_cast<CardTableModRefBS>(Universe::heap()->barrier_set());
-  assert(bs->kind() == BarrierSet::CardTableModRef ||
+  assert(bs->kind() == BarrierSet::CardTableForRS ||
         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
  card_table_write(bs->byte_map_base, tmp, store_addr);
 }
--- a/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp
@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "gc/shared/memset_with_concurrent_readers.hpp"
+#include "runtime/prefetch.inline.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+#if INCLUDE_ALL_GCS
+
+// An implementation of memset, for use when there may be concurrent
+// readers of the region being stored into.
+//
+// We can't use the standard library memset if it is implemented using
+// block initializing stores.  Doing so can result in concurrent readers
+// seeing spurious zeros.
+//
+// We can't use the obvious C/C++ for-loop, because the compiler may
+// recognize the idiomatic loop and optimize it into a call to the
+// standard library memset; we've seen exactly this happen with, for
+// example, Solaris Studio 12.3.  Hence the use of inline assembly
+// code, hiding loops from the compiler's optimizer.
+//
+// We don't attempt to use the standard library memset when it is safe
+// to do so.  We could conservatively do so by detecting the presence
+// of block initializing stores (VM_Version::has_blk_init()), but the
+// implementation provided here should be sufficient.
+
+inline void fill_subword(void* start, void* end, int value) {
+  STATIC_ASSERT(BytesPerWord == 8);
+  assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition");
+  // Dispatch on (end - start).
+  void* pc;
+  __asm__ volatile(
+    // offset := (7 - (end - start)) + 3
+    //   3 instructions from rdpc to DISPATCH
+    " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
+    " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
+    " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
+    " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
+    " jmpl %[pc]+%[offset], %g0\n\t"
+    "  nop\n\t"
+    // DISPATCH: no direct reference, but without it the store block may be elided.
+    "1:\n\t"
+    " stb %[value], [%[end]-7]\n\t" // end[-7] = value
+    " stb %[value], [%[end]-6]\n\t"
+    " stb %[value], [%[end]-5]\n\t"
+    " stb %[value], [%[end]-4]\n\t"
+    " stb %[value], [%[end]-3]\n\t"
+    " stb %[value], [%[end]-2]\n\t"
+    " stb %[value], [%[end]-1]\n\t" // end[-1] = value
+    : /* no outputs */
+      [pc] "&=r" (pc)               // temp
+    : [offset] "&+r" (start),
+      [end] "r" (end),
+      [value] "r" (value)
+    : "memory");
+}
+
+void memset_with_concurrent_readers(void* to, int value, size_t size) {
+  Prefetch::write(to, 0);
+  void* end = static_cast<char*>(to) + size;
+  if (size >= BytesPerWord) {
+    // Fill any partial word prefix.
+    uintx* aligned_to = static_cast<uintx*>(align_ptr_up(to, BytesPerWord));
+    fill_subword(to, aligned_to, value);
+
+    // Compute fill word.
+    STATIC_ASSERT(BitsPerByte == 8);
+    STATIC_ASSERT(BitsPerWord == 64);
+    uintx xvalue = value & 0xff;
+    xvalue |= (xvalue << 8);
+    xvalue |= (xvalue << 16);
+    xvalue |= (xvalue << 32);
+
+    uintx* aligned_end = static_cast<uintx*>(align_ptr_down(end, BytesPerWord));
+    assert(aligned_to <= aligned_end, "invariant");
+
+    // for ( ; aligned_to < aligned_end; ++aligned_to) {
+    //   *aligned_to = xvalue;
+    // }
+    uintptr_t temp;
+    __asm__ volatile(
+      // Unroll loop x8.
+      " sub %[aend], %[ato], %[temp]\n\t"
+      " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
+      " ba %xcc, 2f\n\t"               // goto TEST always
+      "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
+      // LOOP:
+      "1:\n\t"                         // unrolled x8 store loop top
+      " cmp %[temp], %[ato]\n\t"       // cc := limit > (next) aligned_to
+      " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
+      " stx %[xvalue], [%[ato]-56]\n\t"
+      " stx %[xvalue], [%[ato]-48]\n\t"
+      " stx %[xvalue], [%[ato]-40]\n\t"
+      " stx %[xvalue], [%[ato]-32]\n\t"
+      " stx %[xvalue], [%[ato]-24]\n\t"
+      " stx %[xvalue], [%[ato]-16]\n\t"
+      " stx %[xvalue], [%[ato]-8]\n\t"
+      // TEST:
+      "2:\n\t"
+      " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
+      "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
+      // Fill remaining < 8 full words.
+      // Dispatch on (aligned_end - aligned_to).
+      // offset := (7 - (aligned_end - aligned_to)) + 3
+      //   3 instructions from rdpc to DISPATCH
+      " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
+      " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
+      " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
+      " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
+      " jmpl %[temp]+%[ato], %g0\n\t"
+      "  nop\n\t"
+      // DISPATCH: no direct reference, but without it the store block may be elided.
+      "3:\n\t"
+      " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
+      " stx %[xvalue], [%[aend]-48]\n\t"
+      " stx %[xvalue], [%[aend]-40]\n\t"
+      " stx %[xvalue], [%[aend]-32]\n\t"
+      " stx %[xvalue], [%[aend]-24]\n\t"
+      " stx %[xvalue], [%[aend]-16]\n\t"
+      " stx %[xvalue], [%[aend]-8]\n\t"  // aligned_end[-1] = xvalue
+      : /* no outputs */
+        [temp] "&=r" (temp)
+      : [ato] "&+r" (aligned_to),
+        [aend] "r" (aligned_end),
+        [xvalue] "r" (xvalue)
+      : "cc", "memory");
+    to = aligned_end;           // setup for suffix
+  }
+  // Fill any partial word suffix.  Also the prefix if size < BytesPerWord.
+  fill_subword(to, end, value);
+}
+
+#endif // INCLUDE_ALL_GCS
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
@ -981,7 +981,7 @@ class StubGenerator: public StubCodeGenerator {
          __ restore();
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
@ -1014,7 +1014,7 @@ class StubGenerator: public StubCodeGenerator {
          __ restore();
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
        {
          CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
@ -5110,6 +5110,188 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+#define ADLER32_NUM_TEMPS 16
+
+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   O0   - int   adler
+   *   O1   - byte* buff
+   *   O2   - int   len
+   *
+   * Output:
+   *   O0   - int adler result
+   */
+  address generate_updateBytesAdler32() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
+    address start = __ pc();
+
+    Label L_cleanup_loop, L_cleanup_loop_check;
+    Label L_main_loop_check, L_main_loop, L_inner_loop, L_inner_loop_check;
+    Label L_nmax_check_done;
+
+    // Aliases
+    Register s1     = O0;
+    Register s2     = O3;
+    Register buff   = O1;
+    Register len    = O2;
+    Register temp[ADLER32_NUM_TEMPS] = {L0, L1, L2, L3, L4, L5, L6, L7, I0, I1, I2, I3, I4, I5, G3, I7};
+
+    // Max number of bytes we can process before having to take the mod
+    // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+    unsigned long NMAX = 0x15B0;
+
+    // Zero-out the upper bits of len
+    __ clruwu(len);
+
+    // Create the mask 0xFFFF
+    __ set64(0x00FFFF, O4, O5); // O5 is the temp register
+
+    // s1 is initialized to the lower 16 bits of adler
+    // s2 is initialized to the upper 16 bits of adler
+    __ srlx(O0, 16, O5); // adler >> 16
+    __ and3(O0, O4, s1); // s1  = (adler & 0xFFFF)
+    __ and3(O5, O4, s2); // s2  = ((adler >> 16) & 0xFFFF)
+
+    // The pipelined loop needs at least 16 elements for 1 iteration
+    // It does check this, but it is more effective to skip to the cleanup loop
+    // Setup the constant for cutoff checking
+    __ mov(15, O4);
+
+    // Check if we are above the cutoff, if not go to the cleanup loop immediately
+    __ cmp_and_br_short(len, O4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_loop_check);
+
+    // Free up some registers for our use
+    for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+      __ movxtod(temp[i], as_FloatRegister(2*i));
+    }
+
+    // Loop maintenance stuff is done at the end of the loop, so skip to there
+    __ ba_short(L_main_loop_check);
+
+    __ BIND(L_main_loop);
+
+    // Prologue for inner loop
+    __ ldub(buff, 0, L0);
+    __ dec(O5);
+
+    for (int i = 1; i < 8; i++) {
+      __ ldub(buff, i, temp[i]);
+    }
+
+    __ inc(buff, 8);
+
+    // Inner loop processes 16 elements at a time, might never execute if only 16 elements
+    // to be processed by the outter loop
+    __ ba_short(L_inner_loop_check);
+
+    __ BIND(L_inner_loop);
+
+    for (int i = 0; i < 8; i++) {
+      __ ldub(buff, (2*i), temp[(8+(2*i)) % ADLER32_NUM_TEMPS]);
+      __ add(s1, temp[i], s1);
+      __ ldub(buff, (2*i)+1, temp[(8+(2*i)+1) % ADLER32_NUM_TEMPS]);
+      __ add(s2, s1, s2);
+    }
+
+    // Original temp 0-7 used and new loads to temp 0-7 issued
+    // temp 8-15 ready to be consumed
+    __ add(s1, I0, s1);
+    __ dec(O5);
+    __ add(s2, s1, s2);
+    __ add(s1, I1, s1);
+    __ inc(buff, 16);
+    __ add(s2, s1, s2);
+
+    for (int i = 0; i < 6; i++) {
+      __ add(s1, temp[10+i], s1);
+      __ add(s2, s1, s2);
+    }
+
+    __ BIND(L_inner_loop_check);
+    __ nop();
+    __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_inner_loop);
+
+    // Epilogue
+    for (int i = 0; i < 4; i++) {
+      __ ldub(buff, (2*i), temp[8+(2*i)]);
+      __ add(s1, temp[i], s1);
+      __ ldub(buff, (2*i)+1, temp[8+(2*i)+1]);
+      __ add(s2, s1, s2);
+    }
+
+    __ add(s1, temp[4], s1);
+    __ inc(buff, 8);
+
+    for (int i = 0; i < 11; i++) {
+      __ add(s2, s1, s2);
+      __ add(s1, temp[5+i], s1);
+    }
+
+    __ add(s2, s1, s2);
+
+    // Take the mod for s1 and s2
+    __ set64(0xFFF1, L0, L1);
+    __ udivx(s1, L0, L1);
+    __ udivx(s2, L0, L2);
+    __ mulx(L0, L1, L1);
+    __ mulx(L0, L2, L2);
+    __ sub(s1, L1, s1);
+    __ sub(s2, L2, s2);
+
+    // Make sure there is something left to process
+    __ BIND(L_main_loop_check);
+    __ set64(NMAX, L0, L1);
+    // k = len < NMAX ? len : NMAX
+    __ cmp_and_br_short(len, L0, Assembler::greaterEqualUnsigned, Assembler::pt, L_nmax_check_done);
+    __ andn(len, 0x0F, L0); // only loop a multiple of 16 times
+    __ BIND(L_nmax_check_done);
+    __ mov(L0, O5);
+    __ sub(len, L0, len); // len -= k
+
+    __ srlx(O5, 4, O5); // multiplies of 16
+    __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_main_loop);
+
+    // Restore anything we used, take the mod one last time, combine and return
+    // Restore any registers we saved
+    for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+      __ movdtox(as_FloatRegister(2*i), temp[i]);
+    }
+
+    // There might be nothing left to process
+    __ ba_short(L_cleanup_loop_check);
+
+    __ BIND(L_cleanup_loop);
+    __ ldub(buff, 0, O4); // load single byte form buffer
+    __ inc(buff); // buff++
+    __ add(s1, O4, s1); // s1 += *buff++;
+    __ dec(len); // len--
+    __ add(s1, s2, s2); // s2 += s1;
+    __ BIND(L_cleanup_loop_check);
+    __ nop();
+    __ cmp_and_br_short(len, 0, Assembler::notEqual, Assembler::pt, L_cleanup_loop);
+
+    // Take the mod one last time
+    __ set64(0xFFF1, O1, O2);
+    __ udivx(s1, O1, O2);
+    __ udivx(s2, O1, O5);
+    __ mulx(O1, O2, O2);
+    __ mulx(O1, O5, O5);
+    __ sub(s1, O2, s1);
+    __ sub(s2, O5, s2);
+
+    // Combine lower bits and higher bits
+    __ sllx(s2, 16, s2); // s2 = s2 << 16
+    __ or3(s1, s2, s1);  // adler = s2 | s1
+    // Final return value is in O0
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
  void generate_initial() {
    // Generates all stubs and initializes the entry points

@ -5206,6 +5388,11 @@ class StubGenerator: public StubCodeGenerator {
    if (UseCRC32CIntrinsics) {
      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
    }
+
+    // generate Adler32 intrinsics code
+    if (UseAdler32Intrinsics) {
+      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
+    }
  }


--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp
@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc)   {
 enum /* platform_dependent_constants */ {
  // %%%%%%%% May be able to shrink this a lot
  code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 24000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 27000            // simply increase if too small (assembler will crash if too small)
 };

 class Sparc {
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
@ -91,7 +91,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
      }
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      {
        if (index == noreg ) {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@ -85,27 +85,6 @@ void VM_Version::initialize() {
  _supports_cx8 = has_v9();
  _supports_atomic_getset4 = true; // swap instruction

-  // There are Fujitsu Sparc64 CPUs which support blk_init as well so
-  // we have to take this check out of the 'is_niagara()' block below.
-  if (has_blk_init()) {
-    // When using CMS or G1, we cannot use memset() in BOT updates
-    // because the sun4v/CMT version in libc_psr uses BIS which
-    // exposes "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
-      FLAG_SET_DEFAULT(UseMemSetInBOT, false);
-    }
-    // Issue a stern warning if the user has explicitly set
-    // UseMemSetInBOT (it is known to cause issues), but allow
-    // use for experimentation and debugging.
-    if (UseConcMarkSweepGC || UseG1GC) {
-      if (UseMemSetInBOT) {
-        assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
-        warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
-                " on sun4v; please understand that you are using at your own risk!");
-      }
-    }
-  }
-
  if (is_niagara()) {
    // Indirect branch is the same cost as direct
    if (FLAG_IS_DEFAULT(UseInlineCaches)) {
@ -377,6 +356,15 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
  }

+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
+      FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
+    }
+  } else if (UseAdler32Intrinsics) {
+    warning("SPARC Adler32 intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
  if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
    (cache_line_size > ContendedPaddingWidth))
    ContendedPaddingWidth = cache_line_size;
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -438,7 +438,9 @@ class ArrayAddress VALUE_OBJ_CLASS_SPEC {

 };

-const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512*2 / wordSize);
+// 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
+// See fxsave and xsave(EVEX enabled) documentation for layout
+const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);

 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
@ -594,11 +596,16 @@ class Assembler : public AbstractAssembler  {

 private:

-  int evex_encoding;
-  int input_size_in_bits;
-  int avx_vector_len;
-  int tuple_type;
-  bool is_evex_instruction;
+  int _evex_encoding;
+  int _input_size_in_bits;
+  int _avx_vector_len;
+  int _tuple_type;
+  bool _is_evex_instruction;
+  bool _legacy_mode_bw;
+  bool _legacy_mode_dq;
+  bool _legacy_mode_vl;
+  bool _legacy_mode_vlbw;
+  bool _instruction_uses_vl;

  // 64bit prefixes
  int prefix_and_encode(int reg_enc, bool byteinst = false);
@ -972,11 +979,16 @@ private:
  // belong in macro assembler but there is no need for both varieties to exist

  void init_attributes(void) {
-    evex_encoding = 0;
-    input_size_in_bits = 0;
-    avx_vector_len = AVX_NoVec;
-    tuple_type = EVEX_ETUP;
-    is_evex_instruction = false;
+    _evex_encoding = 0;
+    _input_size_in_bits = 0;
+    _avx_vector_len = AVX_NoVec;
+    _tuple_type = EVEX_ETUP;
+    _is_evex_instruction = false;
+    _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
+    _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
+    _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
+    _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
+    _instruction_uses_vl = false;
  }

  void lea(Register dst, Address src);
@ -1344,8 +1356,10 @@ private:
  void fxch(int i = 1);

  void fxrstor(Address src);
+  void xrstor(Address src);

  void fxsave(Address dst);
+  void xsave(Address dst);

  void fyl2x();
  void frndint();
@ -1479,11 +1493,12 @@ private:
  void movb(Address dst, int imm8);
  void movb(Register dst, Address src);

-  void kmovq(KRegister dst, KRegister src);
+  void kmovql(KRegister dst, KRegister src);
  void kmovql(KRegister dst, Register src);
  void kmovdl(KRegister dst, Register src);
-  void kmovq(Address dst, KRegister src);
-  void kmovq(KRegister dst, Address src);
+  void kmovwl(KRegister dst, Register src);
+  void kmovql(Address dst, KRegister src);
+  void kmovql(KRegister dst, Address src);

  void movdl(XMMRegister dst, Register src);
  void movdl(Register dst, XMMRegister src);
@ -1509,9 +1524,12 @@ private:
  void vmovdqu(XMMRegister dst, XMMRegister src);

   // Move Unaligned 512bit Vector
-  void evmovdqu(Address dst, XMMRegister src, int vector_len);
-  void evmovdqu(XMMRegister dst, Address src, int vector_len);
-  void evmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
+  void evmovdqul(Address dst, XMMRegister src, int vector_len);
+  void evmovdqul(XMMRegister dst, Address src, int vector_len);
+  void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
+  void evmovdquq(Address dst, XMMRegister src, int vector_len);
+  void evmovdquq(XMMRegister dst, Address src, int vector_len);
+  void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);

  // Move lower 64bit to high 64bit in 128bit register
  void movlhps(XMMRegister dst, XMMRegister src);
@ -1643,6 +1661,7 @@ private:

  // Pemutation of 64bit words
  void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+  void vpermq(XMMRegister dst, XMMRegister src, int imm8);

  void pause();

@ -1920,6 +1939,10 @@ private:
  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
  void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

+  // Sqrt Packed Floating-Point Values - Double precision only
+  void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
+  void vsqrtpd(XMMRegister dst, Address src, int vector_len);
+
  // Bitwise Logical AND of Packed Floating-Point Values
  void andpd(XMMRegister dst, XMMRegister src);
  void andps(XMMRegister dst, XMMRegister src);
@ -2057,6 +2080,9 @@ private:
  void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
  void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
  void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf32x4h(Address dst, XMMRegister src, int value);
+  void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vinsertf32x4h(XMMRegister dst, Address src, int value);

  // duplicate 4-bytes integer data from src into 8 locations in dest
  void vpbroadcastd(XMMRegister dst, XMMRegister src);
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@ -3798,16 +3798,24 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
    if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
      __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
    }
-    __ xorps(dest->as_xmm_float_reg(),
-             ExternalAddress((address)float_signflip_pool));
-
+    if (UseAVX > 1) {
+      __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
+                   ExternalAddress((address)float_signflip_pool));
+    } else {
+      __ xorps(dest->as_xmm_float_reg(),
+               ExternalAddress((address)float_signflip_pool));
+    }
  } else if (dest->is_double_xmm()) {
    if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
      __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
    }
-    __ xorpd(dest->as_xmm_double_reg(),
-             ExternalAddress((address)double_signflip_pool));
-
+    if (UseAVX > 1) {
+      __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
+                   ExternalAddress((address)double_signflip_pool));
+    } else {
+      __ xorpd(dest->as_xmm_double_reg(),
+               ExternalAddress((address)double_signflip_pool));
+    }
  } else if (left->is_single_fpu() || left->is_double_fpu()) {
    assert(left->fpu() == 0, "arg must be on TOS");
    assert(dest->fpu() == 0, "dest must be TOS");
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp
@ -401,11 +401,9 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args,

    } else if (UseSSE == 1) {
      int xmm_off = xmm_regs_as_doubles_off;
-      for (int n = 0; n < FrameMap::nof_xmm_regs; n++) {
-        if (n < xmm_bypass_limit) {
-          VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
-          map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
-        }
+      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+        VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
+        map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
        xmm_off += 2;
      }
      assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
@ -452,14 +450,11 @@ static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
      __ frstor(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size));

      // Save the FPU registers in de-opt-able form
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size +  0));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size +  8));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 16));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 24));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 32));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 40));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 48));
-      __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + 56));
+      int offset = 0;
+      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+        __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
+        offset += 8;
+      }
    }

    if (UseSSE >= 2) {
@ -468,52 +463,26 @@ static OopMap* save_live_registers(StubAssembler* sasm, int num_rt_args,
      // so always save them as doubles.
      // note that float values are _not_ converted automatically, so for float values
      // the second word contains only garbage data.
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  0), xmm0);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  8), xmm1);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 16), xmm2);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 24), xmm3);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 32), xmm4);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 40), xmm5);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 48), xmm6);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 56), xmm7);
+      int xmm_bypass_limit = FrameMap::nof_xmm_regs;
+      int offset = 0;
 #ifdef _LP64
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 64), xmm8);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 72), xmm9);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 80), xmm10);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 88), xmm11);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 96), xmm12);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 104), xmm13);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 112), xmm14);
-      __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 120), xmm15);
-      if (UseAVX > 2) {
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 128), xmm16);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 136), xmm17);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 144), xmm18);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 152), xmm19);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 160), xmm20);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 168), xmm21);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 176), xmm22);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 184), xmm23);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 192), xmm24);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 200), xmm25);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 208), xmm26);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 216), xmm27);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 224), xmm28);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 232), xmm29);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 240), xmm30);
-        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 248), xmm31);
+      if (UseAVX < 3) {
+        xmm_bypass_limit = xmm_bypass_limit / 2;
+      }
+#endif
+      for (int n = 0; n < xmm_bypass_limit; n++) {
+        XMMRegister xmm_name = as_XMMRegister(n);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
+        offset += 8;
      }
-#endif // _LP64
    } else if (UseSSE == 1) {
-      // save XMM registers as float because double not supported without SSE2
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  0), xmm0);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  8), xmm1);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 16), xmm2);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 24), xmm3);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 32), xmm4);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 40), xmm5);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 48), xmm6);
-      __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 56), xmm7);
+      // save XMM registers as float because double not supported without SSE2(num MMX == num fpu)
+      int offset = 0;
+      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+        XMMRegister xmm_name = as_XMMRegister(n);
+        __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name);
+        offset += 8;
+      }
    }
  }

@ -528,52 +497,26 @@ static void restore_fpu(StubAssembler* sasm, bool restore_fpu_registers = true)
  if (restore_fpu_registers) {
    if (UseSSE >= 2) {
      // restore XMM registers
-      __ movdbl(xmm0, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  0));
-      __ movdbl(xmm1, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  8));
-      __ movdbl(xmm2, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 16));
-      __ movdbl(xmm3, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 24));
-      __ movdbl(xmm4, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 32));
-      __ movdbl(xmm5, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 40));
-      __ movdbl(xmm6, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 48));
-      __ movdbl(xmm7, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 56));
+      int xmm_bypass_limit = FrameMap::nof_xmm_regs;
 #ifdef _LP64
-      __ movdbl(xmm8, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 64));
-      __ movdbl(xmm9, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 72));
-      __ movdbl(xmm10, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 80));
-      __ movdbl(xmm11, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 88));
-      __ movdbl(xmm12, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 96));
-      __ movdbl(xmm13, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 104));
-      __ movdbl(xmm14, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 112));
-      __ movdbl(xmm15, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 120));
-      if (UseAVX > 2) {
-        __ movdbl(xmm16, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 128));
-        __ movdbl(xmm17, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 136));
-        __ movdbl(xmm18, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 144));
-        __ movdbl(xmm19, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 152));
-        __ movdbl(xmm20, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 160));
-        __ movdbl(xmm21, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 168));
-        __ movdbl(xmm22, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 176));
-        __ movdbl(xmm23, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 184));
-        __ movdbl(xmm24, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 192));
-        __ movdbl(xmm25, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 200));
-        __ movdbl(xmm26, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 208));
-        __ movdbl(xmm27, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 216));
-        __ movdbl(xmm28, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 224));
-        __ movdbl(xmm29, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 232));
-        __ movdbl(xmm30, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 240));
-        __ movdbl(xmm31, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 248));
+      if (UseAVX < 3) {
+        xmm_bypass_limit = xmm_bypass_limit / 2;
+      }
+#endif
+      int offset = 0;
+      for (int n = 0; n < xmm_bypass_limit; n++) {
+        XMMRegister xmm_name = as_XMMRegister(n);
+        __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
+        offset += 8;
      }
-#endif // _LP64
    } else if (UseSSE == 1) {
-      // restore XMM registers
-      __ movflt(xmm0, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  0));
-      __ movflt(xmm1, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size +  8));
-      __ movflt(xmm2, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 16));
-      __ movflt(xmm3, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 24));
-      __ movflt(xmm4, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 32));
-      __ movflt(xmm5, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 40));
-      __ movflt(xmm6, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 48));
-      __ movflt(xmm7, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 56));
+      // restore XMM registers(num MMX == num fpu)
+      int offset = 0;
+      for (int n = 0; n < FrameMap::nof_fpu_regs; n++) {
+        XMMRegister xmm_name = as_XMMRegister(n);
+        __ movflt(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset));
+        offset += 8;
+      }
    }

    if (UseSSE < 2) {
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -3751,8 +3751,31 @@ void MacroAssembler::pop_CPU_state() {
 }

 void MacroAssembler::pop_FPU_state() {
-  NOT_LP64(frstor(Address(rsp, 0));)
-  LP64_ONLY(fxrstor(Address(rsp, 0));)
+#ifndef _LP64
+  frstor(Address(rsp, 0));
+#else
+  // AVX will continue to use the fxsave area.
+  // EVEX needs to utilize the xsave area, which is under different
+  // management.
+  if(VM_Version::supports_evex()) {
+    // EDX:EAX describe the XSAVE header and
+    // are obtained while fetching info for XCR0 via cpuid.
+    // These two registers make up 64-bits in the header for which bits
+    // 62:10 are currently reserved for future implementations and unused.  Bit 63
+    // is unused for our implementation as we do not utilize
+    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
+    // the functionality for PKRU state and MSR tracing.
+    // Ergo we are primarily concerned with bits 7..0, which define
+    // which ISA extensions and features are enabled for a given machine and are
+    // defined in XemXcr0Eax and is used to map the XSAVE area
+    // for restoring registers as described via XCR0.
+    movl(rdx,VM_Version::get_xsave_header_upper_segment());
+    movl(rax,VM_Version::get_xsave_header_lower_segment());
+    xrstor(Address(rsp, 0));
+  } else {
+    fxrstor(Address(rsp, 0));
+  }
+#endif
  addptr(rsp, FPUStateSizeInWords * wordSize);
 }

@ -3769,13 +3792,49 @@ void MacroAssembler::push_CPU_state() {
  push_FPU_state();
 }

+#ifdef _LP64
+#define XSTATE_BV 0x200
+#endif
+
 void MacroAssembler::push_FPU_state() {
  subptr(rsp, FPUStateSizeInWords * wordSize);
 #ifndef _LP64
  fnsave(Address(rsp, 0));
  fwait();
 #else
-  fxsave(Address(rsp, 0));
+  // AVX will continue to use the fxsave area.
+  // EVEX needs to utilize the xsave area, which is under different
+  // management.
+  if(VM_Version::supports_evex()) {
+    // Save a copy of EAX and EDX
+    push(rax);
+    push(rdx);
+    // EDX:EAX describe the XSAVE header and
+    // are obtained while fetching info for XCR0 via cpuid.
+    // These two registers make up 64-bits in the header for which bits
+    // 62:10 are currently reserved for future implementations and unused.  Bit 63
+    // is unused for our implementation as we do not utilize
+    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
+    // the functionality for PKRU state and MSR tracing.
+    // Ergo we are primarily concerned with bits 7..0, which define
+    // which ISA extensions and features are enabled for a given machine and are
+    // defined in XemXcr0Eax and is used to program XSAVE area
+    // for saving the required registers as defined in XCR0.
+    int xcr0_edx = VM_Version::get_xsave_header_upper_segment();
+    int xcr0_eax = VM_Version::get_xsave_header_lower_segment();
+    movl(rdx,xcr0_edx);
+    movl(rax,xcr0_eax);
+    xsave(Address(rsp, wordSize*2));
+    // now Apply control bits and clear bytes 8..23 in the header
+    pop(rdx);
+    pop(rax);
+    movl(Address(rsp, XSTATE_BV), xcr0_eax);
+    movl(Address(rsp, XSTATE_BV+4), xcr0_edx);
+    andq(Address(rsp, XSTATE_BV+8), 0);
+    andq(Address(rsp, XSTATE_BV+16), 0);
+  } else {
+    fxsave(Address(rsp, 0));
+  }
 #endif // LP64
 }

@ -4082,6 +4141,84 @@ void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src
  }
 }

+void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  int nds_enc = nds->encoding();
+  int dst_enc = dst->encoding();
+  bool dst_upper_bank = (dst_enc > 15);
+  bool nds_upper_bank = (nds_enc > 15);
+  if (VM_Version::supports_avx512novl() &&
+      (nds_upper_bank || dst_upper_bank)) {
+    if (dst_upper_bank) {
+      subptr(rsp, 64);
+      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      movflt(xmm0, nds);
+      if (reachable(src)) {
+        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
+      } else {
+        lea(rscratch1, src);
+        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
+      }
+      movflt(dst, xmm0);
+      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+      addptr(rsp, 64);
+    } else {
+      movflt(dst, nds);
+      if (reachable(src)) {
+        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
+      } else {
+        lea(rscratch1, src);
+        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
+      }
+    }
+  } else {
+    if (reachable(src)) {
+      vxorps(dst, nds, as_Address(src), Assembler::AVX_128bit);
+    } else {
+      lea(rscratch1, src);
+      vxorps(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
+    }
+  }
+}
+
+void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+  int nds_enc = nds->encoding();
+  int dst_enc = dst->encoding();
+  bool dst_upper_bank = (dst_enc > 15);
+  bool nds_upper_bank = (nds_enc > 15);
+  if (VM_Version::supports_avx512novl() &&
+      (nds_upper_bank || dst_upper_bank)) {
+    if (dst_upper_bank) {
+      subptr(rsp, 64);
+      evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+      movdbl(xmm0, nds);
+      if (reachable(src)) {
+        vxorps(xmm0, xmm0, as_Address(src), Assembler::AVX_128bit);
+      } else {
+        lea(rscratch1, src);
+        vxorps(xmm0, xmm0, Address(rscratch1, 0), Assembler::AVX_128bit);
+      }
+      movdbl(dst, xmm0);
+      evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+      addptr(rsp, 64);
+    } else {
+      movdbl(dst, nds);
+      if (reachable(src)) {
+        vxorps(dst, dst, as_Address(src), Assembler::AVX_128bit);
+      } else {
+        lea(rscratch1, src);
+        vxorps(dst, dst, Address(rscratch1, 0), Assembler::AVX_128bit);
+      }
+    }
+  } else {
+    if (reachable(src)) {
+      vxorpd(dst, nds, as_Address(src), Assembler::AVX_128bit);
+    } else {
+      lea(rscratch1, src);
+      vxorpd(dst, nds, Address(rscratch1, 0), Assembler::AVX_128bit);
+    }
+  }
+}
+
 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
  if (reachable(src)) {
    vxorpd(dst, nds, as_Address(src), vector_len);
@ -4318,9 +4455,10 @@ void MacroAssembler::store_check(Register obj, Address dst) {
 void MacroAssembler::store_check(Register obj) {
  // Does a store check for the oop in register obj. The content of
  // register obj is destroyed afterwards.
-
  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  assert(bs->kind() == BarrierSet::CardTableForRS ||
+         bs->kind() == BarrierSet::CardTableExtension,
+         "Wrong barrier set kind");

  CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
@ -4570,69 +4708,58 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int

  // if we are coming from c1, xmm registers may be live
  int off = 0;
+  int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
+  if (UseAVX > 2) {
+    num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
+  }
+
  if (UseSSE == 1)  {
    subptr(rsp, sizeof(jdouble)*8);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
-    movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
+    for (int n = 0; n < 8; n++) {
+      movflt(Address(rsp, off++*sizeof(jdouble)), as_XMMRegister(n));
+    }
  } else if (UseSSE >= 2)  {
    if (UseAVX > 2) {
+      push(rbx);
      movl(rbx, 0xffff);
-#ifdef _LP64
-      kmovql(k1, rbx);
-#else
-      kmovdl(k1, rbx);
-#endif
+      kmovwl(k1, rbx);
+      pop(rbx);
    }
 #ifdef COMPILER2
    if (MaxVectorSize > 16) {
-      assert(UseAVX > 0, "256bit vectors are supported only with AVX");
+      if(UseAVX > 2) {
+        // Save upper half of ZMM registes
+        subptr(rsp, 32*num_xmm_regs);
+        for (int n = 0; n < num_xmm_regs; n++) {
+          vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+        }
+        off = 0;
+      }
+      assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
      // Save upper half of YMM registes
-      subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-      vextractf128h(Address(rsp,  0),xmm0);
-      vextractf128h(Address(rsp, 16),xmm1);
-      vextractf128h(Address(rsp, 32),xmm2);
-      vextractf128h(Address(rsp, 48),xmm3);
-      vextractf128h(Address(rsp, 64),xmm4);
-      vextractf128h(Address(rsp, 80),xmm5);
-      vextractf128h(Address(rsp, 96),xmm6);
-      vextractf128h(Address(rsp,112),xmm7);
-#ifdef _LP64
-      vextractf128h(Address(rsp,128),xmm8);
-      vextractf128h(Address(rsp,144),xmm9);
-      vextractf128h(Address(rsp,160),xmm10);
-      vextractf128h(Address(rsp,176),xmm11);
-      vextractf128h(Address(rsp,192),xmm12);
-      vextractf128h(Address(rsp,208),xmm13);
-      vextractf128h(Address(rsp,224),xmm14);
-      vextractf128h(Address(rsp,240),xmm15);
-#endif
+      subptr(rsp, 16*num_xmm_regs);
+      for (int n = 0; n < num_xmm_regs; n++) {
+        vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+      }
    }
 #endif
-    // Save whole 128bit (16 bytes) XMM regiters
-    subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
-    movdqu(Address(rsp,off++*16),xmm0);
-    movdqu(Address(rsp,off++*16),xmm1);
-    movdqu(Address(rsp,off++*16),xmm2);
-    movdqu(Address(rsp,off++*16),xmm3);
-    movdqu(Address(rsp,off++*16),xmm4);
-    movdqu(Address(rsp,off++*16),xmm5);
-    movdqu(Address(rsp,off++*16),xmm6);
-    movdqu(Address(rsp,off++*16),xmm7);
+    // Save whole 128bit (16 bytes) XMM registers
+    subptr(rsp, 16*num_xmm_regs);
+    off = 0;
 #ifdef _LP64
-    movdqu(Address(rsp,off++*16),xmm8);
-    movdqu(Address(rsp,off++*16),xmm9);
-    movdqu(Address(rsp,off++*16),xmm10);
-    movdqu(Address(rsp,off++*16),xmm11);
-    movdqu(Address(rsp,off++*16),xmm12);
-    movdqu(Address(rsp,off++*16),xmm13);
-    movdqu(Address(rsp,off++*16),xmm14);
-    movdqu(Address(rsp,off++*16),xmm15);
+    if (VM_Version::supports_avx512novl()) {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        vextractf32x4h(Address(rsp, off++*16), as_XMMRegister(n), 0);
+      }
+    } else {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+      }
+    }
+#else
+    for (int n = 0; n < num_xmm_regs; n++) {
+      movdqu(Address(rsp, off++*16), as_XMMRegister(n));
+    }
 #endif
  }

@ -4687,7 +4814,7 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
  movsd(Address(rsp, 0), xmm0);
  fld_d(Address(rsp, 0));
 #endif // _LP64
-  addptr(rsp, sizeof(jdouble) * nb_args);
+  addptr(rsp, sizeof(jdouble)*nb_args);
  if (num_fpu_regs_in_use > 1) {
    // Must save return value to stack and then restore entire FPU
    // stack except incoming arguments
@ -4697,63 +4824,50 @@ void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int
      addptr(rsp, sizeof(jdouble));
    }
    fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
-    addptr(rsp, sizeof(jdouble) * nb_args);
+    addptr(rsp, sizeof(jdouble)*nb_args);
  }

  off = 0;
  if (UseSSE == 1)  {
-    movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
-    movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
+    for (int n = 0; n < 8; n++) {
+      movflt(as_XMMRegister(n), Address(rsp, off++*sizeof(jdouble)));
+    }
    addptr(rsp, sizeof(jdouble)*8);
  } else if (UseSSE >= 2)  {
    // Restore whole 128bit (16 bytes) XMM regiters
-    movdqu(xmm0, Address(rsp,off++*16));
-    movdqu(xmm1, Address(rsp,off++*16));
-    movdqu(xmm2, Address(rsp,off++*16));
-    movdqu(xmm3, Address(rsp,off++*16));
-    movdqu(xmm4, Address(rsp,off++*16));
-    movdqu(xmm5, Address(rsp,off++*16));
-    movdqu(xmm6, Address(rsp,off++*16));
-    movdqu(xmm7, Address(rsp,off++*16));
 #ifdef _LP64
-    movdqu(xmm8, Address(rsp,off++*16));
-    movdqu(xmm9, Address(rsp,off++*16));
-    movdqu(xmm10, Address(rsp,off++*16));
-    movdqu(xmm11, Address(rsp,off++*16));
-    movdqu(xmm12, Address(rsp,off++*16));
-    movdqu(xmm13, Address(rsp,off++*16));
-    movdqu(xmm14, Address(rsp,off++*16));
-    movdqu(xmm15, Address(rsp,off++*16));
+    if (VM_Version::supports_avx512novl()) {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        vinsertf32x4h(as_XMMRegister(n), Address(rsp, off++*16), 0);
+      }
+    }
+    else {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        movdqu(as_XMMRegister(n), Address(rsp, off++*16));
+      }
+    }
+#else
+    for (int n = 0; n < num_xmm_regs; n++) {
+      movdqu(as_XMMRegister(n), Address(rsp, off++ * 16));
+    }
 #endif
-    addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+    addptr(rsp, 16*num_xmm_regs);
+
 #ifdef COMPILER2
    if (MaxVectorSize > 16) {
      // Restore upper half of YMM registes.
-      vinsertf128h(xmm0, Address(rsp,  0));
-      vinsertf128h(xmm1, Address(rsp, 16));
-      vinsertf128h(xmm2, Address(rsp, 32));
-      vinsertf128h(xmm3, Address(rsp, 48));
-      vinsertf128h(xmm4, Address(rsp, 64));
-      vinsertf128h(xmm5, Address(rsp, 80));
-      vinsertf128h(xmm6, Address(rsp, 96));
-      vinsertf128h(xmm7, Address(rsp,112));
-#ifdef _LP64
-      vinsertf128h(xmm8, Address(rsp,128));
-      vinsertf128h(xmm9, Address(rsp,144));
-      vinsertf128h(xmm10, Address(rsp,160));
-      vinsertf128h(xmm11, Address(rsp,176));
-      vinsertf128h(xmm12, Address(rsp,192));
-      vinsertf128h(xmm13, Address(rsp,208));
-      vinsertf128h(xmm14, Address(rsp,224));
-      vinsertf128h(xmm15, Address(rsp,240));
-#endif
-      addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
+      off = 0;
+      for (int n = 0; n < num_xmm_regs; n++) {
+        vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
+      }
+      addptr(rsp, 16*num_xmm_regs);
+      if(UseAVX > 2) {
+        off = 0;
+        for (int n = 0; n < num_xmm_regs; n++) {
+          vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
+        }
+        addptr(rsp, 32*num_xmm_regs);
+      }
    }
 #endif
  }
@ -7093,11 +7207,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
      Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
      if (UseAVX > 2) {
        movl(rtmp, 0xffff);
-#ifdef _LP64
-        kmovql(k1, rtmp);
-#else
-        kmovdl(k1, rtmp);
-#endif
+        kmovwl(k1, rtmp);
      }
      movdl(xtmp, value);
      if (UseAVX > 2 && UseUnalignedLoadStores) {
@ -7110,7 +7220,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
        align(16);

        BIND(L_fill_64_bytes_loop);
-        evmovdqu(Address(to, 0), xtmp, Assembler::AVX_512bit);
+        evmovdqul(Address(to, 0), xtmp, Assembler::AVX_512bit);
        addptr(to, 64);
        subl(count, 16 << shift);
        jcc(Assembler::greaterEqual, L_fill_64_bytes_loop);
@ -7118,7 +7228,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
        BIND(L_check_fill_32_bytes);
        addl(count, 8 << shift);
        jccb(Assembler::less, L_check_fill_8_bytes);
-        evmovdqu(Address(to, 0), xtmp, Assembler::AVX_256bit);
+        evmovdqul(Address(to, 0), xtmp, Assembler::AVX_256bit);
        addptr(to, 32);
        subl(count, 8 << shift);

@ -8397,6 +8507,14 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
  Label L_tail, L_tail_restore, L_tail_loop, L_exit, L_align_loop, L_aligned;
  Label L_fold_tail, L_fold_128b, L_fold_512b, L_fold_512b_loop, L_fold_tail_loop;

+  // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+  // context for the registers used, where all instructions below are using 128-bit mode
+  // On EVEX without VL and BW, these instructions will all be AVX.
+  if (VM_Version::supports_avx512vlbw()) {
+    movl(tmp, 0xffff);
+    kmovwl(k1, tmp);
+  }
+
  lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
  notl(crc); // ~crc
  cmpl(len, 16);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp
@ -1069,6 +1069,9 @@ public:
  void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
  void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);

+  void vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+
  // AVX Vector instructions

  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@ -115,6 +115,7 @@ class RegisterSaver {
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
                                           int* total_frame_words, bool verify_fpu, bool save_vectors) {
  int vect_words = 0;
+  int num_xmm_regs = XMMRegisterImpl::number_of_registers;
 #ifdef COMPILER2
  if (save_vectors) {
    assert(UseAVX > 0, "512bit vectors are supported only with EVEX");
@ -173,59 +174,50 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
  }

+  int off = st0_off;
+  int delta = st1_off - off;
+
  // Save the FPU registers in de-opt-able form
+  for (int n = 0; n < FloatRegisterImpl::number_of_registers; n++) {
+    __ fstp_d(Address(rsp, off*wordSize));
+    off += delta;
+  }

-  __ fstp_d(Address(rsp, st0_off*wordSize)); // st(0)
-  __ fstp_d(Address(rsp, st1_off*wordSize)); // st(1)
-  __ fstp_d(Address(rsp, st2_off*wordSize)); // st(2)
-  __ fstp_d(Address(rsp, st3_off*wordSize)); // st(3)
-  __ fstp_d(Address(rsp, st4_off*wordSize)); // st(4)
-  __ fstp_d(Address(rsp, st5_off*wordSize)); // st(5)
-  __ fstp_d(Address(rsp, st6_off*wordSize)); // st(6)
-  __ fstp_d(Address(rsp, st7_off*wordSize)); // st(7)
-
-  if( UseSSE == 1 ) {           // Save the XMM state
-    __ movflt(Address(rsp,xmm0_off*wordSize),xmm0);
-    __ movflt(Address(rsp,xmm1_off*wordSize),xmm1);
-    __ movflt(Address(rsp,xmm2_off*wordSize),xmm2);
-    __ movflt(Address(rsp,xmm3_off*wordSize),xmm3);
-    __ movflt(Address(rsp,xmm4_off*wordSize),xmm4);
-    __ movflt(Address(rsp,xmm5_off*wordSize),xmm5);
-    __ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
-    __ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
-  } else if( UseSSE >= 2 ) {
+  off = xmm0_off;
+  delta = xmm1_off - off;
+  if(UseSSE == 1) {           // Save the XMM state
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n));
+      off += delta;
+    }
+  } else if(UseSSE >= 2) {
    // Save whole 128bit (16 bytes) XMM regiters
-    __ movdqu(Address(rsp,xmm0_off*wordSize),xmm0);
-    __ movdqu(Address(rsp,xmm1_off*wordSize),xmm1);
-    __ movdqu(Address(rsp,xmm2_off*wordSize),xmm2);
-    __ movdqu(Address(rsp,xmm3_off*wordSize),xmm3);
-    __ movdqu(Address(rsp,xmm4_off*wordSize),xmm4);
-    __ movdqu(Address(rsp,xmm5_off*wordSize),xmm5);
-    __ movdqu(Address(rsp,xmm6_off*wordSize),xmm6);
-    __ movdqu(Address(rsp,xmm7_off*wordSize),xmm7);
+    if (VM_Version::supports_avx512novl()) {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ vextractf32x4h(Address(rsp, off*wordSize), as_XMMRegister(n), 0);
+        off += delta;
+      }
+    } else {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n));
+        off += delta;
+      }
+    }
  }

  if (vect_words > 0) {
    assert(vect_words*wordSize == 128, "");
    __ subptr(rsp, 128); // Save upper half of YMM registes
-    __ vextractf128h(Address(rsp,  0),xmm0);
-    __ vextractf128h(Address(rsp, 16),xmm1);
-    __ vextractf128h(Address(rsp, 32),xmm2);
-    __ vextractf128h(Address(rsp, 48),xmm3);
-    __ vextractf128h(Address(rsp, 64),xmm4);
-    __ vextractf128h(Address(rsp, 80),xmm5);
-    __ vextractf128h(Address(rsp, 96),xmm6);
-    __ vextractf128h(Address(rsp,112),xmm7);
+    off = 0;
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
+    }
    if (UseAVX > 2) {
      __ subptr(rsp, 256); // Save upper half of ZMM registes
-      __ vextractf64x4h(Address(rsp, 0), xmm0);
-      __ vextractf64x4h(Address(rsp, 32), xmm1);
-      __ vextractf64x4h(Address(rsp, 64), xmm2);
-      __ vextractf64x4h(Address(rsp, 96), xmm3);
-      __ vextractf64x4h(Address(rsp, 128), xmm4);
-      __ vextractf64x4h(Address(rsp, 160), xmm5);
-      __ vextractf64x4h(Address(rsp, 192), xmm6);
-      __ vextractf64x4h(Address(rsp, 224), xmm7);
+      off = 0;
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ vextractf64x4h(Address(rsp, off++*32), as_XMMRegister(n));
+      }
    }
  }

@ -238,58 +230,40 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
  OopMap* map =  new OopMap( frame_words, 0 );

 #define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
-
-  map->set_callee_saved(STACK_OFFSET( rax_off), rax->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET( rcx_off), rcx->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET( rdx_off), rdx->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET( rbx_off), rbx->as_VMReg());
-  // rbp, location is known implicitly, no oopMap
-  map->set_callee_saved(STACK_OFFSET( rsi_off), rsi->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET( rdi_off), rdi->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st0_off), as_FloatRegister(0)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st1_off), as_FloatRegister(1)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st2_off), as_FloatRegister(2)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st3_off), as_FloatRegister(3)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st4_off), as_FloatRegister(4)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st5_off), as_FloatRegister(5)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st6_off), as_FloatRegister(6)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(st7_off), as_FloatRegister(7)->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm0_off), xmm0->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm1_off), xmm1->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm2_off), xmm2->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm3_off), xmm3->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm4_off), xmm4->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm5_off), xmm5->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm6_off), xmm6->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm7_off), xmm7->as_VMReg());
-  // %%% This is really a waste but we'll keep things as they were for now
-  if (true) {
 #define NEXTREG(x) (x)->as_VMReg()->next()
-    map->set_callee_saved(STACK_OFFSET(st0H_off), NEXTREG(as_FloatRegister(0)));
-    map->set_callee_saved(STACK_OFFSET(st1H_off), NEXTREG(as_FloatRegister(1)));
-    map->set_callee_saved(STACK_OFFSET(st2H_off), NEXTREG(as_FloatRegister(2)));
-    map->set_callee_saved(STACK_OFFSET(st3H_off), NEXTREG(as_FloatRegister(3)));
-    map->set_callee_saved(STACK_OFFSET(st4H_off), NEXTREG(as_FloatRegister(4)));
-    map->set_callee_saved(STACK_OFFSET(st5H_off), NEXTREG(as_FloatRegister(5)));
-    map->set_callee_saved(STACK_OFFSET(st6H_off), NEXTREG(as_FloatRegister(6)));
-    map->set_callee_saved(STACK_OFFSET(st7H_off), NEXTREG(as_FloatRegister(7)));
-    map->set_callee_saved(STACK_OFFSET(xmm0H_off), NEXTREG(xmm0));
-    map->set_callee_saved(STACK_OFFSET(xmm1H_off), NEXTREG(xmm1));
-    map->set_callee_saved(STACK_OFFSET(xmm2H_off), NEXTREG(xmm2));
-    map->set_callee_saved(STACK_OFFSET(xmm3H_off), NEXTREG(xmm3));
-    map->set_callee_saved(STACK_OFFSET(xmm4H_off), NEXTREG(xmm4));
-    map->set_callee_saved(STACK_OFFSET(xmm5H_off), NEXTREG(xmm5));
-    map->set_callee_saved(STACK_OFFSET(xmm6H_off), NEXTREG(xmm6));
-    map->set_callee_saved(STACK_OFFSET(xmm7H_off), NEXTREG(xmm7));
+
+  map->set_callee_saved(STACK_OFFSET(rax_off), rax->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(rcx_off), rcx->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(rdx_off), rdx->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(rbx_off), rbx->as_VMReg());
+  // rbp, location is known implicitly, no oopMap
+  map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg());
+  // %%% This is really a waste but we'll keep things as they were for now for the upper component
+  off = st0_off;
+  delta = st1_off - off;
+  for (int n = 0; n < FloatRegisterImpl::number_of_registers; n++) {
+    FloatRegister freg_name = as_FloatRegister(n);
+    map->set_callee_saved(STACK_OFFSET(off), freg_name->as_VMReg());
+    map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(freg_name));
+    off += delta;
+  }
+  off = xmm0_off;
+  delta = xmm1_off - off;
+  for (int n = 0; n < num_xmm_regs; n++) {
+    XMMRegister xmm_name = as_XMMRegister(n);
+    map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
+    map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(xmm_name));
+    off += delta;
+  }
 #undef NEXTREG
 #undef STACK_OFFSET
-  }

  return map;
-
 }

 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+  int num_xmm_regs = XMMRegisterImpl::number_of_registers;
  // Recover XMM & FPU state
  int additional_frame_bytes = 0;
 #ifdef COMPILER2
@ -301,52 +275,43 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
 #else
  assert(!restore_vectors, "vectors are generated only by C2");
 #endif
+  int off = xmm0_off;
+  int delta = xmm1_off - off;
+
  if (UseSSE == 1) {
    assert(additional_frame_bytes == 0, "");
-    __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
-    __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
-    __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
-    __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
-    __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
-    __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
-    __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
-    __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize));
+      off += delta;
+    }
  } else if (UseSSE >= 2) {
-#define STACK_ADDRESS(x) Address(rsp,(x)*wordSize + additional_frame_bytes)
-    __ movdqu(xmm0,STACK_ADDRESS(xmm0_off));
-    __ movdqu(xmm1,STACK_ADDRESS(xmm1_off));
-    __ movdqu(xmm2,STACK_ADDRESS(xmm2_off));
-    __ movdqu(xmm3,STACK_ADDRESS(xmm3_off));
-    __ movdqu(xmm4,STACK_ADDRESS(xmm4_off));
-    __ movdqu(xmm5,STACK_ADDRESS(xmm5_off));
-    __ movdqu(xmm6,STACK_ADDRESS(xmm6_off));
-    __ movdqu(xmm7,STACK_ADDRESS(xmm7_off));
-#undef STACK_ADDRESS
+    if (VM_Version::supports_avx512novl()) {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ vinsertf32x4h(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes), 0);
+        off += delta;
+      }
+    } else {
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes));
+        off += delta;
+      }
+    }
  }
  if (restore_vectors) {
+    if (UseAVX > 2) {
+      off = 0;
+      for (int n = 0; n < num_xmm_regs; n++) {
+        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, off++*32));
+      }
+      __ addptr(rsp, additional_frame_bytes*2); // Save upper half of ZMM registes
+    }
    // Restore upper half of YMM registes.
    assert(additional_frame_bytes == 128, "");
-    __ vinsertf128h(xmm0, Address(rsp,  0));
-    __ vinsertf128h(xmm1, Address(rsp, 16));
-    __ vinsertf128h(xmm2, Address(rsp, 32));
-    __ vinsertf128h(xmm3, Address(rsp, 48));
-    __ vinsertf128h(xmm4, Address(rsp, 64));
-    __ vinsertf128h(xmm5, Address(rsp, 80));
-    __ vinsertf128h(xmm6, Address(rsp, 96));
-    __ vinsertf128h(xmm7, Address(rsp,112));
-    __ addptr(rsp, additional_frame_bytes);
-    if (UseAVX > 2) {
-      additional_frame_bytes = 256;
-      __ vinsertf64x4h(xmm0, Address(rsp, 0));
-      __ vinsertf64x4h(xmm1, Address(rsp, 32));
-      __ vinsertf64x4h(xmm2, Address(rsp, 64));
-      __ vinsertf64x4h(xmm3, Address(rsp, 96));
-      __ vinsertf64x4h(xmm4, Address(rsp, 128));
-      __ vinsertf64x4h(xmm5, Address(rsp, 160));
-      __ vinsertf64x4h(xmm6, Address(rsp, 192));
-      __ vinsertf64x4h(xmm7, Address(rsp, 224));
-      __ addptr(rsp, additional_frame_bytes);
+    off = 0;
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp, off++*16));
    }
+    __ addptr(rsp, additional_frame_bytes); // Save upper half of YMM registes
  }
  __ pop_FPU_state();
  __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@ -69,7 +69,9 @@ class SimpleRuntimeFrame {
 class RegisterSaver {
  // Capture info about frame layout.  Layout offsets are in jint
  // units because compiler frame slots are jints.
+#define HALF_ZMM_BANK_WORDS 128
 #define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
+#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
  enum layout {
    fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
@ -89,23 +91,24 @@ class RegisterSaver {
    DEF_XMM_OFFS(13),
    DEF_XMM_OFFS(14),
    DEF_XMM_OFFS(15),
-    DEF_XMM_OFFS(16),
-    DEF_XMM_OFFS(17),
-    DEF_XMM_OFFS(18),
-    DEF_XMM_OFFS(19),
-    DEF_XMM_OFFS(20),
-    DEF_XMM_OFFS(21),
-    DEF_XMM_OFFS(22),
-    DEF_XMM_OFFS(23),
-    DEF_XMM_OFFS(24),
-    DEF_XMM_OFFS(25),
-    DEF_XMM_OFFS(26),
-    DEF_XMM_OFFS(27),
-    DEF_XMM_OFFS(28),
-    DEF_XMM_OFFS(29),
-    DEF_XMM_OFFS(30),
-    DEF_XMM_OFFS(31),
-    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords - 1)*wordSize / BytesPerInt),
+    zmm_off = fpu_state_off + ((FPUStateSizeInWords - (HALF_ZMM_BANK_WORDS + 1))*wordSize / BytesPerInt),
+    DEF_ZMM_OFFS(16),
+    DEF_ZMM_OFFS(17),
+    DEF_ZMM_OFFS(18),
+    DEF_ZMM_OFFS(19),
+    DEF_ZMM_OFFS(20),
+    DEF_ZMM_OFFS(21),
+    DEF_ZMM_OFFS(22),
+    DEF_ZMM_OFFS(23),
+    DEF_ZMM_OFFS(24),
+    DEF_ZMM_OFFS(25),
+    DEF_ZMM_OFFS(26),
+    DEF_ZMM_OFFS(27),
+    DEF_ZMM_OFFS(28),
+    DEF_ZMM_OFFS(29),
+    DEF_ZMM_OFFS(30),
+    DEF_ZMM_OFFS(31),
+    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
    fpu_stateH_end,
    r15_off, r15H_off,
    r14_off, r14H_off,
@ -155,9 +158,10 @@ class RegisterSaver {

 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
  int vect_words = 0;
-  int num_xmm_regs = 16;
-  if (UseAVX > 2) {
-    num_xmm_regs = 32;
+  int off = 0;
+  int num_xmm_regs = XMMRegisterImpl::number_of_registers;
+  if (UseAVX < 3) {
+    num_xmm_regs = num_xmm_regs/2;
  }
 #ifdef COMPILER2
  if (save_vectors) {
@ -165,9 +169,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
    assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
    // Save upper half of YMM registers
    vect_words = 16 * num_xmm_regs / wordSize;
-    additional_frame_words += vect_words;
-    if (UseAVX > 2) {
-      // Save upper half of ZMM registers as well
+    if (UseAVX < 3) {
      additional_frame_words += vect_words;
    }
  }
@ -195,77 +197,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
  __ enter();          // rsp becomes 16-byte aligned here
  __ push_CPU_state(); // Push a multiple of 16 bytes

-  if (vect_words > 0) {
+  // push cpu state handles this on EVEX enabled targets
+  if ((vect_words > 0) && (UseAVX < 3)) {
    assert(vect_words*wordSize >= 256, "");
-    __ subptr(rsp, 256); // Save upper half of YMM registes(0..15)
-    __ vextractf128h(Address(rsp, 0), xmm0);
-    __ vextractf128h(Address(rsp, 16), xmm1);
-    __ vextractf128h(Address(rsp, 32), xmm2);
-    __ vextractf128h(Address(rsp, 48), xmm3);
-    __ vextractf128h(Address(rsp, 64), xmm4);
-    __ vextractf128h(Address(rsp, 80), xmm5);
-    __ vextractf128h(Address(rsp, 96), xmm6);
-    __ vextractf128h(Address(rsp, 112), xmm7);
-    __ vextractf128h(Address(rsp, 128), xmm8);
-    __ vextractf128h(Address(rsp, 144), xmm9);
-    __ vextractf128h(Address(rsp, 160), xmm10);
-    __ vextractf128h(Address(rsp, 176), xmm11);
-    __ vextractf128h(Address(rsp, 192), xmm12);
-    __ vextractf128h(Address(rsp, 208), xmm13);
-    __ vextractf128h(Address(rsp, 224), xmm14);
-    __ vextractf128h(Address(rsp, 240), xmm15);
-    if (UseAVX > 2) {
-      __ subptr(rsp, 256); // Save upper half of YMM registes(16..31)
-      __ vextractf128h(Address(rsp, 0), xmm16);
-      __ vextractf128h(Address(rsp, 16), xmm17);
-      __ vextractf128h(Address(rsp, 32), xmm18);
-      __ vextractf128h(Address(rsp, 48), xmm19);
-      __ vextractf128h(Address(rsp, 64), xmm20);
-      __ vextractf128h(Address(rsp, 80), xmm21);
-      __ vextractf128h(Address(rsp, 96), xmm22);
-      __ vextractf128h(Address(rsp, 112), xmm23);
-      __ vextractf128h(Address(rsp, 128), xmm24);
-      __ vextractf128h(Address(rsp, 144), xmm25);
-      __ vextractf128h(Address(rsp, 160), xmm26);
-      __ vextractf128h(Address(rsp, 176), xmm27);
-      __ vextractf128h(Address(rsp, 192), xmm28);
-      __ vextractf128h(Address(rsp, 208), xmm29);
-      __ vextractf128h(Address(rsp, 224), xmm30);
-      __ vextractf128h(Address(rsp, 240), xmm31);
-      // Now handle the ZMM registers (0..31)
-      __ subptr(rsp, 1024); // Save upper half of ZMM registes
-      __ vextractf64x4h(Address(rsp, 0), xmm0);
-      __ vextractf64x4h(Address(rsp, 32), xmm1);
-      __ vextractf64x4h(Address(rsp, 64), xmm2);
-      __ vextractf64x4h(Address(rsp, 96), xmm3);
-      __ vextractf64x4h(Address(rsp, 128), xmm4);
-      __ vextractf64x4h(Address(rsp, 160), xmm5);
-      __ vextractf64x4h(Address(rsp, 192), xmm6);
-      __ vextractf64x4h(Address(rsp, 224), xmm7);
-      __ vextractf64x4h(Address(rsp, 256), xmm8);
-      __ vextractf64x4h(Address(rsp, 288), xmm9);
-      __ vextractf64x4h(Address(rsp, 320), xmm10);
-      __ vextractf64x4h(Address(rsp, 352), xmm11);
-      __ vextractf64x4h(Address(rsp, 384), xmm12);
-      __ vextractf64x4h(Address(rsp, 416), xmm13);
-      __ vextractf64x4h(Address(rsp, 448), xmm14);
-      __ vextractf64x4h(Address(rsp, 480), xmm15);
-      __ vextractf64x4h(Address(rsp, 512), xmm16);
-      __ vextractf64x4h(Address(rsp, 544), xmm17);
-      __ vextractf64x4h(Address(rsp, 576), xmm18);
-      __ vextractf64x4h(Address(rsp, 608), xmm19);
-      __ vextractf64x4h(Address(rsp, 640), xmm20);
-      __ vextractf64x4h(Address(rsp, 672), xmm21);
-      __ vextractf64x4h(Address(rsp, 704), xmm22);
-      __ vextractf64x4h(Address(rsp, 736), xmm23);
-      __ vextractf64x4h(Address(rsp, 768), xmm24);
-      __ vextractf64x4h(Address(rsp, 800), xmm25);
-      __ vextractf64x4h(Address(rsp, 832), xmm26);
-      __ vextractf64x4h(Address(rsp, 864), xmm27);
-      __ vextractf64x4h(Address(rsp, 896), xmm28);
-      __ vextractf64x4h(Address(rsp, 928), xmm29);
-      __ vextractf64x4h(Address(rsp, 960), xmm30);
-      __ vextractf64x4h(Address(rsp, 992), xmm31);
+    // Save upper half of YMM registes(0..num_xmm_regs)
+    __ subptr(rsp, num_xmm_regs*16);
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ vextractf128h(Address(rsp, off++*16), as_XMMRegister(n));
    }
  }
  if (frame::arg_reg_save_area_bytes != 0) {
@ -299,39 +237,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
  map->set_callee_saved(STACK_OFFSET( r13_off ), r13->as_VMReg());
  map->set_callee_saved(STACK_OFFSET( r14_off ), r14->as_VMReg());
  map->set_callee_saved(STACK_OFFSET( r15_off ), r15->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm0_off ), xmm0->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm1_off ), xmm1->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm2_off ), xmm2->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm3_off ), xmm3->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm4_off ), xmm4->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm5_off ), xmm5->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm6_off ), xmm6->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm7_off ), xmm7->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm8_off ), xmm8->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm9_off ), xmm9->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm10_off), xmm10->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm11_off), xmm11->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm12_off), xmm12->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm13_off), xmm13->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm14_off), xmm14->as_VMReg());
-  map->set_callee_saved(STACK_OFFSET(xmm15_off), xmm15->as_VMReg());
-  if (UseAVX > 2) {
-    map->set_callee_saved(STACK_OFFSET(xmm16_off), xmm16->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm17_off), xmm17->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm18_off), xmm18->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm19_off), xmm19->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm20_off), xmm20->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm21_off), xmm21->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm22_off), xmm22->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm23_off), xmm23->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm24_off), xmm24->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm25_off), xmm25->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm26_off), xmm26->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm27_off), xmm27->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm28_off), xmm28->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm29_off), xmm29->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm30_off), xmm30->as_VMReg());
-    map->set_callee_saved(STACK_OFFSET(xmm31_off), xmm31->as_VMReg());
+  // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
+  // on EVEX enabled targets, we get it included in the xsave area
+  off = xmm0_off;
+  int delta = xmm1_off - off;
+  for (int n = 0; n < 16; n++) {
+    XMMRegister xmm_name = as_XMMRegister(n);
+    map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
+    off += delta;
+  }
+  if(UseAVX > 2) {
+    // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
+    off = zmm16_off;
+    delta = zmm17_off - off;
+    for (int n = 16; n < num_xmm_regs; n++) {
+      XMMRegister xmm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg());
+      off += delta;
+    }
  }

  // %%% These should all be a waste but we'll keep things as they were for now
@ -351,39 +274,24 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
    map->set_callee_saved(STACK_OFFSET( r13H_off ), r13->as_VMReg()->next());
    map->set_callee_saved(STACK_OFFSET( r14H_off ), r14->as_VMReg()->next());
    map->set_callee_saved(STACK_OFFSET( r15H_off ), r15->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm0H_off ), xmm0->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm1H_off ), xmm1->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm2H_off ), xmm2->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm3H_off ), xmm3->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm4H_off ), xmm4->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm5H_off ), xmm5->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm6H_off ), xmm6->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm7H_off ), xmm7->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm8H_off ), xmm8->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm9H_off ), xmm9->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm10H_off), xmm10->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm11H_off), xmm11->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm12H_off), xmm12->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm13H_off), xmm13->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
-    map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
+    // For both AVX and EVEX we will use the legacy FXSAVE area for xmm0..xmm15,
+    // on EVEX enabled targets, we get it included in the xsave area
+    off = xmm0H_off;
+    delta = xmm1H_off - off;
+    for (int n = 0; n < 16; n++) {
+      XMMRegister xmm_name = as_XMMRegister(n);
+      map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
+      off += delta;
+    }
    if (UseAVX > 2) {
-      map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
-      map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
+      // Obtain xmm16..xmm31 from the XSAVE area on EVEX enabled targets
+      off = zmm16H_off;
+      delta = zmm17H_off - off;
+      for (int n = 16; n < num_xmm_regs; n++) {
+        XMMRegister xmm_name = as_XMMRegister(n);
+        map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()->next());
+        off += delta;
+      }
    }
  }

@ -391,86 +299,25 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
 }

 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+  int num_xmm_regs = XMMRegisterImpl::number_of_registers;
+  if (UseAVX < 3) {
+    num_xmm_regs = num_xmm_regs/2;
+  }
  if (frame::arg_reg_save_area_bytes != 0) {
    // Pop arg register save area
    __ addptr(rsp, frame::arg_reg_save_area_bytes);
  }
 #ifdef COMPILER2
-  if (restore_vectors) {
-    // Restore upper half of YMM registes (0..15)
-    assert(UseAVX > 0, "512bit vectors are supported only with AVX");
-    assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
-    __ vinsertf128h(xmm0, Address(rsp,  0));
-    __ vinsertf128h(xmm1, Address(rsp, 16));
-    __ vinsertf128h(xmm2, Address(rsp, 32));
-    __ vinsertf128h(xmm3, Address(rsp, 48));
-    __ vinsertf128h(xmm4, Address(rsp, 64));
-    __ vinsertf128h(xmm5, Address(rsp, 80));
-    __ vinsertf128h(xmm6, Address(rsp, 96));
-    __ vinsertf128h(xmm7, Address(rsp,112));
-    __ vinsertf128h(xmm8, Address(rsp,128));
-    __ vinsertf128h(xmm9, Address(rsp,144));
-    __ vinsertf128h(xmm10, Address(rsp,160));
-    __ vinsertf128h(xmm11, Address(rsp,176));
-    __ vinsertf128h(xmm12, Address(rsp,192));
-    __ vinsertf128h(xmm13, Address(rsp,208));
-    __ vinsertf128h(xmm14, Address(rsp,224));
-    __ vinsertf128h(xmm15, Address(rsp,240));
-    __ addptr(rsp, 256);
-    if (UseAVX > 2) {
-      // Restore upper half of YMM registes (16..31)
-      __ vinsertf128h(xmm16, Address(rsp,  0));
-      __ vinsertf128h(xmm17, Address(rsp, 16));
-      __ vinsertf128h(xmm18, Address(rsp, 32));
-      __ vinsertf128h(xmm19, Address(rsp, 48));
-      __ vinsertf128h(xmm20, Address(rsp, 64));
-      __ vinsertf128h(xmm21, Address(rsp, 80));
-      __ vinsertf128h(xmm22, Address(rsp, 96));
-      __ vinsertf128h(xmm23, Address(rsp,112));
-      __ vinsertf128h(xmm24, Address(rsp,128));
-      __ vinsertf128h(xmm25, Address(rsp,144));
-      __ vinsertf128h(xmm26, Address(rsp,160));
-      __ vinsertf128h(xmm27, Address(rsp,176));
-      __ vinsertf128h(xmm28, Address(rsp,192));
-      __ vinsertf128h(xmm29, Address(rsp,208));
-      __ vinsertf128h(xmm30, Address(rsp,224));
-      __ vinsertf128h(xmm31, Address(rsp,240));
-      __ addptr(rsp, 256);
-      // Restore upper half of ZMM registes.
-      __ vinsertf64x4h(xmm0, Address(rsp, 0));
-      __ vinsertf64x4h(xmm1, Address(rsp, 32));
-      __ vinsertf64x4h(xmm2, Address(rsp, 64));
-      __ vinsertf64x4h(xmm3, Address(rsp, 96));
-      __ vinsertf64x4h(xmm4, Address(rsp, 128));
-      __ vinsertf64x4h(xmm5, Address(rsp, 160));
-      __ vinsertf64x4h(xmm6, Address(rsp, 192));
-      __ vinsertf64x4h(xmm7, Address(rsp, 224));
-      __ vinsertf64x4h(xmm8, Address(rsp, 256));
-      __ vinsertf64x4h(xmm9, Address(rsp, 288));
-      __ vinsertf64x4h(xmm10, Address(rsp, 320));
-      __ vinsertf64x4h(xmm11, Address(rsp, 352));
-      __ vinsertf64x4h(xmm12, Address(rsp, 384));
-      __ vinsertf64x4h(xmm13, Address(rsp, 416));
-      __ vinsertf64x4h(xmm14, Address(rsp, 448));
-      __ vinsertf64x4h(xmm15, Address(rsp, 480));
-      __ vinsertf64x4h(xmm16, Address(rsp, 512));
-      __ vinsertf64x4h(xmm17, Address(rsp, 544));
-      __ vinsertf64x4h(xmm18, Address(rsp, 576));
-      __ vinsertf64x4h(xmm19, Address(rsp, 608));
-      __ vinsertf64x4h(xmm20, Address(rsp, 640));
-      __ vinsertf64x4h(xmm21, Address(rsp, 672));
-      __ vinsertf64x4h(xmm22, Address(rsp, 704));
-      __ vinsertf64x4h(xmm23, Address(rsp, 736));
-      __ vinsertf64x4h(xmm24, Address(rsp, 768));
-      __ vinsertf64x4h(xmm25, Address(rsp, 800));
-      __ vinsertf64x4h(xmm26, Address(rsp, 832));
-      __ vinsertf64x4h(xmm27, Address(rsp, 864));
-      __ vinsertf64x4h(xmm28, Address(rsp, 896));
-      __ vinsertf64x4h(xmm29, Address(rsp, 928));
-      __ vinsertf64x4h(xmm30, Address(rsp, 960));
-      __ vinsertf64x4h(xmm31, Address(rsp, 992));
-      __ addptr(rsp, 1024);
+  // On EVEX enabled targets everything is handled in pop fpu state
+  if ((restore_vectors) && (UseAVX < 3)) {
+    assert(UseAVX > 0, "256/512-bit vectors are supported only with AVX");
+    assert(MaxVectorSize == 64, "up to 512bit vectors are supported now");
+    int off = 0;
+    // Restore upper half of YMM registes (0..num_xmm_regs)
+    for (int n = 0; n < num_xmm_regs; n++) {
+      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  off++*16));
    }
+    __ addptr(rsp, num_xmm_regs*16);
  }
 #else
  assert(!restore_vectors, "vectors are generated only by C2");
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@ -722,7 +722,7 @@ class StubGenerator: public StubCodeGenerator {
           __ popa();
         }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
@ -754,7 +754,7 @@ class StubGenerator: public StubCodeGenerator {
        }
        break;

-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
        {
          CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
@ -795,6 +795,12 @@ class StubGenerator: public StubCodeGenerator {
  void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
    assert( UseSSE >= 2, "supported cpu only" );
    Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
+    if (UseAVX > 2) {
+      __ push(rbx);
+      __ movl(rbx, 0xffff);
+      __ kmovdl(k1, rbx);
+      __ pop(rbx);
+    }
    // Copy 64-byte chunks
    __ jmpb(L_copy_64_bytes);
    __ align(OptoLoopAlignment);
@ -802,8 +808,8 @@ class StubGenerator: public StubCodeGenerator {

    if (UseUnalignedLoadStores) {
      if (UseAVX > 2) {
-        __ evmovdqu(xmm0, Address(from, 0), Assembler::AVX_512bit);
-        __ evmovdqu(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
+        __ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit);
+        __ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
      } else if (UseAVX == 2) {
        __ vmovdqu(xmm0, Address(from,  0));
        __ vmovdqu(Address(from, to_from, Address::times_1,  0), xmm0);
@ -2217,6 +2223,15 @@ class StubGenerator: public StubCodeGenerator {
    const XMMRegister xmm_temp4  = xmm5;

    __ enter();   // required for proper stackwalking of RuntimeStub frame
+
+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rdx, 0xffff);
+      __ kmovdl(k1, rdx);
+    }
+
    __ movptr(from, from_param);
    __ movptr(key, key_param);

@ -2315,6 +2330,15 @@ class StubGenerator: public StubCodeGenerator {
    const XMMRegister xmm_temp4  = xmm5;

    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rdx, 0xffff);
+      __ kmovdl(k1, rdx);
+    }
+
    __ movptr(from, from_param);
    __ movptr(key, key_param);

@ -2441,6 +2465,14 @@ class StubGenerator: public StubCodeGenerator {
    __ enter(); // required for proper stackwalking of RuntimeStub frame
    handleSOERegisters(true /*saving*/);

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rdx, 0xffff);
+      __ kmovdl(k1, rdx);
+    }
+
    // load registers from incoming parameters
    const Address  from_param(rbp, 8+0);
    const Address  to_param  (rbp, 8+4);
@ -2602,6 +2634,14 @@ class StubGenerator: public StubCodeGenerator {
    __ enter(); // required for proper stackwalking of RuntimeStub frame
    handleSOERegisters(true /*saving*/);

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rdx, 0xffff);
+      __ kmovdl(k1, rdx);
+    }
+
    // load registers from incoming parameters
    const Address  from_param(rbp, 8+0);
    const Address  to_param  (rbp, 8+4);
@ -2782,6 +2822,14 @@ class StubGenerator: public StubCodeGenerator {
    __ enter();
    handleSOERegisters(true);  // Save registers

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rdx, 0xffff);
+      __ kmovdl(k1, rdx);
+    }
+
    __ movptr(state, state_param);
    __ movptr(subkeyH, subkeyH_param);
    __ movptr(data, data_param);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@ -269,12 +269,16 @@ class StubGenerator: public StubCodeGenerator {
      __ kmovql(k1, rbx);
    }
 #ifdef _WIN64
+    int last_reg = 15;
    if (UseAVX > 2) {
-      for (int i = 6; i <= 31; i++) {
-        __ movdqu(xmm_save(i), as_XMMRegister(i));
+      last_reg = 31;
+    }
+    if (VM_Version::supports_avx512novl()) {
+      for (int i = xmm_save_first; i <= last_reg; i++) {
+        __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
      }
    } else {
-      for (int i = 6; i <= 15; i++) {
+      for (int i = xmm_save_first; i <= last_reg; i++) {
        __ movdqu(xmm_save(i), as_XMMRegister(i));
      }
    }
@ -367,28 +371,34 @@ class StubGenerator: public StubCodeGenerator {
 #ifdef ASSERT
    // verify that threads correspond
    {
-      Label L, S;
+     Label L1, L2, L3;
      __ cmpptr(r15_thread, thread);
-      __ jcc(Assembler::notEqual, S);
+      __ jcc(Assembler::equal, L1);
+      __ stop("StubRoutines::call_stub: r15_thread is corrupted");
+      __ bind(L1);
      __ get_thread(rbx);
+      __ cmpptr(r15_thread, thread);
+      __ jcc(Assembler::equal, L2);
+      __ stop("StubRoutines::call_stub: r15_thread is modified by call");
+      __ bind(L2);
      __ cmpptr(r15_thread, rbx);
-      __ jcc(Assembler::equal, L);
-      __ bind(S);
-      __ jcc(Assembler::equal, L);
+      __ jcc(Assembler::equal, L3);
      __ stop("StubRoutines::call_stub: threads must correspond");
-      __ bind(L);
+      __ bind(L3);
    }
 #endif

    // restore regs belonging to calling function
 #ifdef _WIN64
-    int xmm_ub = 15;
-    if (UseAVX > 2) {
-      xmm_ub = 31;
-    }
    // emit the restores for xmm regs
-    for (int i = 6; i <= xmm_ub; i++) {
-      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    if (VM_Version::supports_avx512novl()) {
+      for (int i = xmm_save_first; i <= last_reg; i++) {
+        __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
+      }
+    } else {
+      for (int i = xmm_save_first; i <= last_reg; i++) {
+        __ movdqu(as_XMMRegister(i), xmm_save(i));
+      }
    }
 #endif
    __ movptr(r15, r15_save);
@ -450,15 +460,20 @@ class StubGenerator: public StubCodeGenerator {
 #ifdef ASSERT
    // verify that threads correspond
    {
-      Label L, S;
+      Label L1, L2, L3;
      __ cmpptr(r15_thread, thread);
-      __ jcc(Assembler::notEqual, S);
+      __ jcc(Assembler::equal, L1);
+      __ stop("StubRoutines::catch_exception: r15_thread is corrupted");
+      __ bind(L1);
      __ get_thread(rbx);
+      __ cmpptr(r15_thread, thread);
+      __ jcc(Assembler::equal, L2);
+      __ stop("StubRoutines::catch_exception: r15_thread is modified by call");
+      __ bind(L2);
      __ cmpptr(r15_thread, rbx);
-      __ jcc(Assembler::equal, L);
-      __ bind(S);
+      __ jcc(Assembler::equal, L3);
      __ stop("StubRoutines::catch_exception: threads must correspond");
-      __ bind(L);
+      __ bind(L3);
    }
 #endif

@ -1244,7 +1259,7 @@ class StubGenerator: public StubCodeGenerator {
           __ popa();
        }
         break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
@ -1284,7 +1299,7 @@ class StubGenerator: public StubCodeGenerator {
          __ popa();
        }
        break;
-      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableForRS:
      case BarrierSet::CardTableExtension:
        {
          CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
@ -1333,11 +1348,15 @@ class StubGenerator: public StubCodeGenerator {
    __ align(OptoLoopAlignment);
    if (UseUnalignedLoadStores) {
      Label L_end;
+      if (UseAVX > 2) {
+        __ movl(to, 0xffff);
+        __ kmovql(k1, to);
+      }
      // Copy 64-bytes per iteration
      __ BIND(L_loop);
      if (UseAVX > 2) {
-        __ evmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
-        __ evmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
+        __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
+        __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
      } else if (UseAVX == 2) {
        __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
@ -1413,11 +1432,15 @@ class StubGenerator: public StubCodeGenerator {
    __ align(OptoLoopAlignment);
    if (UseUnalignedLoadStores) {
      Label L_end;
+      if (UseAVX > 2) {
+        __ movl(to, 0xffff);
+        __ kmovql(k1, to);
+      }
      // Copy 64-bytes per iteration
      __ BIND(L_loop);
      if (UseAVX > 2) {
-        __ evmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
-        __ evmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
+        __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 32), Assembler::AVX_512bit);
+        __ evmovdqul(Address(dest, qword_count, Address::times_8, 32), xmm0, Assembler::AVX_512bit);
      } else if (UseAVX == 2) {
        __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
        __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
@ -3097,6 +3120,14 @@ class StubGenerator: public StubCodeGenerator {

    __ enter(); // required for proper stackwalking of RuntimeStub frame

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rax, 0xffff);
+      __ kmovql(k1, rax);
+    }
+
    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

@ -3191,6 +3222,14 @@ class StubGenerator: public StubCodeGenerator {

    __ enter(); // required for proper stackwalking of RuntimeStub frame

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rax, 0xffff);
+      __ kmovql(k1, rax);
+    }
+
    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));

@ -3303,6 +3342,14 @@ class StubGenerator: public StubCodeGenerator {

    __ enter(); // required for proper stackwalking of RuntimeStub frame

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rax, 0xffff);
+      __ kmovql(k1, rax);
+    }
+
 #ifdef _WIN64
    // on win64, fill len_reg from stack position
    __ movl(len_reg, len_mem);
@ -3499,6 +3546,14 @@ class StubGenerator: public StubCodeGenerator {

    __ enter(); // required for proper stackwalking of RuntimeStub frame

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rax, 0xffff);
+      __ kmovql(k1, rax);
+    }
+
 #ifdef _WIN64
    // on win64, fill len_reg from stack position
    __ movl(len_reg, len_mem);
@ -3737,6 +3792,14 @@ class StubGenerator: public StubCodeGenerator {

    __ enter();

+    // For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
+    // context for the registers used, where all instructions below are using 128-bit mode
+    // On EVEX without VL and BW, these instructions will all be AVX.
+    if (VM_Version::supports_avx512vlbw()) {
+      __ movl(rax, 0xffff);
+      __ kmovql(k1, rax);
+    }
+
 #ifdef _WIN64
    // save the xmm registers which must be preserved 6-10
    __ subptr(rsp, -rsp_after_call_off * wordSize);
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_32.hpp
@ -31,7 +31,7 @@

 enum platform_dependent_constants {
  code_size1 =  9000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 30000            // simply increase if too small (assembler will crash if too small)
 };

 class x86 {
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp
@ -33,7 +33,7 @@ static bool    returns_to_call_stub(address return_pc)   { return return_pc == _

 enum platform_dependent_constants {
  code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 24000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 32000           // simply increase if too small (assembler will crash if too small)
 };

 class x86 {
--- a/hotspot/src/cpu/x86/vm/templateTable_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86.cpp
@ -200,7 +200,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
      }
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      {
        if (val == noreg) {
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp
@ -367,16 +367,12 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    __ movl(rcx, VM_Version::ymm_test_value());
    __ movdl(xmm0, rcx);
    __ movl(rcx, 0xffff);
-#ifdef _LP64
-    __ kmovql(k1, rcx);
-#else
-    __ kmovdl(k1, rcx);
-#endif
+    __ kmovwl(k1, rcx);
    __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
-    __ evmovdqu(xmm7, xmm0, Assembler::AVX_512bit);
+    __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 #ifdef _LP64
-    __ evmovdqu(xmm8, xmm0, Assembler::AVX_512bit);
-    __ evmovdqu(xmm31, xmm0, Assembler::AVX_512bit);
+    __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
+    __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 #endif
    VM_Version::clean_cpuFeatures();
    __ jmp(save_restore_except);
@ -427,11 +423,11 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
    UseAVX = 3;
    UseSSE = 2;
    __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
-    __ evmovdqu(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
-    __ evmovdqu(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
+    __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
+    __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 #ifdef _LP64
-    __ evmovdqu(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
-    __ evmovdqu(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
+    __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
+    __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 #endif
    VM_Version::clean_cpuFeatures();
    UseAVX = saved_useavx;
@ -714,6 +710,11 @@ void VM_Version::get_processor_features() {
    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
  }

+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
  // Adjust RTM (Restricted Transactional Memory) flags
  if (!supports_rtm() && UseRTMLocking) {
    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp
@ -227,14 +227,15 @@ public:
  union XemXcr0Eax {
    uint32_t value;
    struct {
-      uint32_t x87    : 1,
-               sse    : 1,
-               ymm    : 1,
-                      : 2,
-               opmask : 1,
-               zmm512 : 1,
-                zmm32 : 1,
-                      : 24;
+      uint32_t x87     : 1,
+               sse     : 1,
+               ymm     : 1,
+               bndregs : 1,
+               bndcsr  : 1,
+               opmask  : 1,
+               zmm512  : 1,
+               zmm32   : 1,
+                       : 24;
    } bits;
  };

@ -703,6 +704,7 @@ public:
  static bool supports_avx512bw() { return (_cpuFeatures & CPU_AVX512BW) != 0; }
  static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
  static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
+  static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
  // Intel features
  static bool is_intel_family_core() { return is_intel() &&
                                       extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
@ -817,6 +819,12 @@ public:
    intx count = PrefetchFieldsAhead;
    return count >= 0 ? count : 1;
  }
+  static uint32_t get_xsave_header_lower_segment() {
+    return _cpuid_info.xem_xcr0_eax.value;
+  }
+  static uint32_t get_xsave_header_upper_segment() {
+    return _cpuid_info.xem_xcr0_edx;
+  }
 };

 #endif // CPU_X86_VM_VM_VERSION_X86_HPP
--- a/hotspot/src/cpu/x86/vm/x86.ad
+++ b/hotspot/src/cpu/x86/vm/x86.ad
@ -1661,46 +1661,55 @@ const bool Matcher::match_rule_supported(int opcode) {
  if (!has_match_rule(opcode))
    return false;

+  bool ret_value = true;
  switch (opcode) {
    case Op_PopCountI:
    case Op_PopCountL:
      if (!UsePopCountInstruction)
-        return false;
-    break;
+        ret_value = false;
+      break;
    case Op_MulVI:
      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
-        return false;
-    break;
+        ret_value = false;
+      break;
    case Op_MulVL:
    case Op_MulReductionVL:
      if (VM_Version::supports_avx512dq() == false)
-        return false;
+        ret_value = false;
+      break;
    case Op_AddReductionVL:
      if (UseAVX < 3) // only EVEX : vector connectivity becomes an issue here
-        return false;
+        ret_value = false;
+      break;
    case Op_AddReductionVI:
      if (UseSSE < 3) // requires at least SSE3
-        return false;
+        ret_value = false;
+      break;
    case Op_MulReductionVI:
      if (UseSSE < 4) // requires at least SSE4
-        return false;
+        ret_value = false;
+      break;
    case Op_AddReductionVF:
    case Op_AddReductionVD:
    case Op_MulReductionVF:
    case Op_MulReductionVD:
      if (UseSSE < 1) // requires at least SSE
-        return false;
-    break;
+        ret_value = false;
+      break;
+    case Op_SqrtVD:
+      if (UseAVX < 1) // enabled for AVX only
+        ret_value = false;
+      break;
    case Op_CompareAndSwapL:
 #ifdef _LP64
    case Op_CompareAndSwapP:
 #endif
      if (!VM_Version::supports_cx8())
-        return false;
-    break;
+        ret_value = false;
+      break;
  }

-  return true;  // Per default match rules are supported.
+  return ret_value;  // Per default match rules are supported.
 }

 // Max vector size in bytes. 0 if not supported.
@ -1721,14 +1730,24 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
  case T_DOUBLE:
  case T_LONG:
    if (size < 16) return 0;
+    break;
  case T_FLOAT:
  case T_INT:
    if (size < 8) return 0;
+    break;
  case T_BOOLEAN:
-  case T_BYTE:
+    if (size < 4) return 0;
+    break;
  case T_CHAR:
+    if (size < 4) return 0;
+    break;
+  case T_BYTE:
+    if (size < 4) return 0;
+    if ((size > 32) && !VM_Version::supports_avx512bw()) return 0;
+    break;
  case T_SHORT:
    if (size < 4) return 0;
+    if ((size > 16) && !VM_Version::supports_avx512bw()) return 0;
    break;
  default:
    ShouldNotReachHere();
@ -1800,7 +1819,7 @@ static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo
      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
      break;
    case Op_VecZ:
-      __ evmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
+      __ evmovdqul(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
      break;
    default:
      ShouldNotReachHere();
@ -1855,7 +1874,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
        break;
      case Op_VecZ:
-        __ evmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
+        __ evmovdqul(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
        break;
      default:
        ShouldNotReachHere();
@ -1875,7 +1894,7 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
        break;
      case Op_VecZ:
-        __ evmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+        __ evmovdqul(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
        break;
      default:
        ShouldNotReachHere();
@ -1929,9 +1948,40 @@ static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
    }
 #endif
  }
-  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
+  bool is_single_byte = false;
+  int vec_len = 0;
+  if ((UseAVX > 2) && (stack_offset != 0)) {
+    switch (ireg) {
+	case Op_VecS:
+    case Op_VecD:
+    case Op_VecX:
+	  break;
+	case Op_VecY:
+	  vec_len = 1;
+	  break;
+    case Op_VecZ:
+	  vec_len = 2;
+	  break;
+    }
+    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, Assembler::EVEX_FVM, Assembler::EVEX_32bit, 0);
+  }
+  int offset_size = 0;
+  int size = 5;
+  if (UseAVX > 2 ) {
+    if ((VM_Version::supports_avx512vl() == false) && (vec_len == 2)) { 
+      offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
+      size += 2; // Need an additional two bytes for EVEX encoding
+    } else if ((VM_Version::supports_avx512vl() == false) && (vec_len < 2)) { 
+      offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
+    } else {
+      offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
+      size += 2; // Need an additional two bytes for EVEX encodding
+    }
+  } else {
+    offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
+  }
  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
-  return 5+offset_size;
+  return size+offset_size;
 }

 static inline jfloat replicate4_imm(int con, int width) {
@ -2675,11 +2725,10 @@ instruct negF_reg_reg(regF dst, regF src) %{
  predicate(UseAVX > 0);
  match(Set dst (NegF src));
  ins_cost(150);
-  format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
+  format %{ "vnegatess  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
  ins_encode %{
-    int vector_len = 0;
-    __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signflip()), vector_len);
+    __ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
+                 ExternalAddress(float_signflip()));
  %}
  ins_pipe(pipe_slow);
 %}
@ -2700,12 +2749,11 @@ instruct negD_reg_reg(regD dst, regD src) %{
  predicate(UseAVX > 0);
  match(Set dst (NegD src));
  ins_cost(150);
-  format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
+  format %{ "vnegatess  $dst, $src, [0x8000000000000000]\t"
            "# neg double by sign flipping" %}
  ins_encode %{
-    int vector_len = 0;
-    __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signflip()), vector_len);
+    __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
+                 ExternalAddress(double_signflip()));
  %}
  ins_pipe(pipe_slow);
 %}
@ -2838,7 +2886,7 @@ instruct loadV64(vecZ dst, memory mem) %{
  format %{ "vmovdqu $dst k0,$mem\t! load vector (64 bytes)" %}
  ins_encode %{
    int vector_len = 2;
-    __ evmovdqu($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ evmovdqul($dst$$XMMRegister, $mem$$Address, vector_len);
  %}
  ins_pipe( pipe_slow );
 %}
@ -2895,7 +2943,7 @@ instruct storeV64(memory mem, vecZ src) %{
  format %{ "vmovdqu $mem k0,$src\t! store vector (64 bytes)" %}
  ins_encode %{
    int vector_len = 2;
-    __ evmovdqu($mem$$Address, $src$$XMMRegister, vector_len);
+    __ evmovdqul($mem$$Address, $src$$XMMRegister, vector_len);
  %}
  ins_pipe( pipe_slow );
 %}
@ -3315,6 +3363,37 @@ instruct Repl8F_mem(vecY dst, memory mem) %{
  ins_pipe( pipe_slow );
 %}

+instruct Repl2F_zero(vecD dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F_zero(vecX dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8F_zero(vecY dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX < 3);
+  match(Set dst (ReplicateF zero));
+  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 instruct Repl2D_mem(vecX dst, memory mem) %{
  predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
  match(Set dst (ReplicateD (LoadD mem)));
@ -3349,6 +3428,28 @@ instruct Repl4D_mem(vecY dst, memory mem) %{
  ins_pipe( pipe_slow );
 %}

+// Replicate double (8 byte) scalar zero to be vector
+instruct Repl2D_zero(vecX dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
+  match(Set dst (ReplicateD zero));
+  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4D_zero(vecY dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX < 3);
+  match(Set dst (ReplicateD zero));
+  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // ====================GENERIC REPLICATE==========================================

 // Replicate byte scalar to be vector
@ -3680,38 +3781,6 @@ instruct Repl4F(vecX dst, regF src) %{
  ins_pipe( pipe_slow );
 %}

-// Replicate float (4 byte) scalar zero to be vector
-instruct Repl2F_zero(vecD dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateF zero));
-  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
-  ins_encode %{
-    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl4F_zero(vecX dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateF zero));
-  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
-  ins_encode %{
-    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl8F_zero(vecY dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateF zero));
-  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate double (8 bytes) scalar to be vector
 instruct Repl2D(vecX dst, regD src) %{
  predicate(n->as_Vector()->length() == 2);
@ -3723,28 +3792,6 @@ instruct Repl2D(vecX dst, regD src) %{
  ins_pipe( pipe_slow );
 %}

-// Replicate double (8 byte) scalar zero to be vector
-instruct Repl2D_zero(vecX dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateD zero));
-  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
-  ins_encode %{
-    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl4D_zero(vecY dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateD zero));
-  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // ====================EVEX REPLICATE=============================================

 instruct Repl4B_mem_evex(vecS dst, memory mem) %{
@ -3814,7 +3861,7 @@ instruct Repl32B_mem_evex(vecY dst, memory mem) %{
 %}

 instruct Repl64B_evex(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
+  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateB src));
  format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
  ins_encode %{
@ -3825,7 +3872,7 @@ instruct Repl64B_evex(vecZ dst, rRegI src) %{
 %}

 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateB (LoadB mem)));
  format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
  ins_encode %{
@ -3862,7 +3909,7 @@ instruct Repl32B_imm_evex(vecY dst, immI con) %{
 %}

 instruct Repl64B_imm_evex(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
+  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateB con));
  format %{ "movq    $dst,[$constantaddress]\n\t"
            "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
@ -3953,7 +4000,7 @@ instruct Repl16S_mem_evex(vecY dst, memory mem) %{
 %}

 instruct Repl32S_evex(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateS src));
  format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
  ins_encode %{
@ -3964,7 +4011,7 @@ instruct Repl32S_evex(vecZ dst, rRegI src) %{
 %}

 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateS (LoadS mem)));
  format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
  ins_encode %{
@ -4001,7 +4048,7 @@ instruct Repl16S_imm_evex(vecY dst, immI con) %{
 %}

 instruct Repl32S_imm_evex(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
  match(Set dst (ReplicateS con));
  format %{ "movq    $dst,[$constantaddress]\n\t"
            "vpbroadcastw $dst,$dst\t! replicate32S" %}
@ -4318,13 +4365,50 @@ instruct Repl16F_mem_evex(vecZ dst, memory mem) %{
  ins_pipe( pipe_slow );
 %}

+instruct Repl2F_zero_evex(vecD dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2F zero" %}
+  ins_encode %{
+    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F_zero_evex(vecX dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4F zero" %}
+  ins_encode %{
+    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8F_zero_evex(vecY dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate8F zero" %}
+  ins_encode %{
+    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 instruct Repl16F_zero_evex(vecZ dst, immF0 zero) %{
  predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
  match(Set dst (ReplicateF zero));
-  format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate16F zero" %}
  ins_encode %{
+    // Use vpxor in place of vxorps since EVEX has a constriant on dq for vxorps: this is a 512-bit operation
    int vector_len = 2;
-    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  %}
  ins_pipe( fpu_reg_reg );
 %}
@ -4373,13 +4457,38 @@ instruct Repl8D_mem_evex(vecZ dst, memory mem) %{
  ins_pipe( pipe_slow );
 %}

+instruct Repl2D_zero_evex(vecX dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX > 2);
+  match(Set dst (ReplicateD zero));
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate2D zero" %}
+  ins_encode %{
+    // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4D_zero_evex(vecY dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2);
+  match(Set dst (ReplicateD zero));
+  format %{ "vpxor  $dst k0,$dst,$dst\t! replicate4D zero" %}
+  ins_encode %{
+    // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 instruct Repl8D_zero_evex(vecZ dst, immD0 zero) %{
  predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
  match(Set dst (ReplicateD zero));
-  format %{ "vxorpd  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
+  format %{ "vpxor  $dst k0,$dst,$dst,vect512\t! replicate8D zero" %}
  ins_encode %{
+    // Use vpxor in place of vxorpd since EVEX has a constriant on dq for vxorpd: this is a 512-bit operation
    int vector_len = 2;
-    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
  %}
  ins_pipe( fpu_reg_reg );
 %}
@ -7474,6 +7583,75 @@ instruct vshiftcnt(vecS dst, rRegI cnt) %{
  ins_pipe( pipe_slow );
 %}

+// --------------------------------- Sqrt --------------------------------------
+
+// Floating point vector sqrt - double precision only
+instruct vsqrt2D_reg(vecX dst, vecX src) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SqrtVD src));
+  format %{ "vsqrtpd  $dst,$src\t! sqrt packed2D" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt2D_mem(vecX dst, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SqrtVD (LoadVector mem)));
+  format %{ "vsqrtpd  $dst,$mem\t! sqrt packed2D" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt4D_reg(vecY dst, vecY src) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SqrtVD src));
+  format %{ "vsqrtpd  $dst,$src\t! sqrt packed4D" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt4D_mem(vecY dst, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SqrtVD (LoadVector mem)));
+  format %{ "vsqrtpd  $dst,$mem\t! sqrt packed4D" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt8D_reg(vecZ dst, vecZ src) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (SqrtVD src));
+  format %{ "vsqrtpd  $dst,$src\t! sqrt packed8D" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsqrt8D_mem(vecZ dst, memory mem) %{
+  predicate(UseAVX > 2 && n->as_Vector()->length() == 8);
+  match(Set dst (SqrtVD (LoadVector mem)));
+  format %{ "vsqrtpd  $dst,$mem\t! sqrt packed8D" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // ------------------------------ LeftShift -----------------------------------

 // Shorts/Chars vector left shift
--- a/hotspot/src/cpu/x86/vm/x86_32.ad
+++ b/hotspot/src/cpu/x86/vm/x86_32.ad
@ -1004,10 +1004,10 @@ static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_off
      __ vmovdqu(Address(rsp, dst_offset), xmm0);
      __ vmovdqu(xmm0, Address(rsp, -32));
    case Op_VecZ:
-      __ evmovdqu(Address(rsp, -64), xmm0, 2);
-      __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
-      __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
-      __ evmovdqu(xmm0, Address(rsp, -64), 2);
+      __ evmovdqul(Address(rsp, -64), xmm0, 2);
+      __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
+      __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
+      __ evmovdqul(xmm0, Address(rsp, -64), 2);
      break;
    default:
      ShouldNotReachHere();
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@ -1075,10 +1075,10 @@ static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
      __ vmovdqu(Address(rsp, dst_offset), xmm0);
      __ vmovdqu(xmm0, Address(rsp, -32));
    case Op_VecZ:
-      __ evmovdqu(Address(rsp, -64), xmm0, 2);
-      __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
-      __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
-      __ evmovdqu(xmm0, Address(rsp, -64), 2);
+      __ evmovdqul(Address(rsp, -64), xmm0, 2);
+      __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
+      __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
+      __ evmovdqul(xmm0, Address(rsp, -64), 2);
      break;
    default:
      ShouldNotReachHere();
--- a/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp
+++ b/hotspot/src/os_cpu/linux_sparc/vm/vm_version_linux_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -53,6 +53,10 @@ static bool detect_niagara() {
  return cpuinfo_field_contains("cpu", "Niagara");
 }

+static bool detect_M_family() {
+  return cpuinfo_field_contains("cpu", "SPARC-M");
+}
+
 static bool detect_blkinit() {
  return cpuinfo_field_contains("cpucaps", "blkinit");
 }
@ -66,6 +70,11 @@ int VM_Version::platform_features(int features) {
    features = niagara1_m | T_family_m;
  }

+  if (detect_M_family()) {
+    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Detected Linux on M family");)
+    features = sun4v_m | generic_v9_m | M_family_m | T_family_m;
+  }
+
  if (detect_blkinit()) {
    features |= blk_init_instructions_m;
  }
--- a/hotspot/src/share/vm/adlc/Doc/Syntax.doc
+++ b/hotspot/src/share/vm/adlc/Doc/Syntax.doc
@ -1,5 +1,5 @@
 #
-# Copyright (c) 1997, 1998, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -33,7 +33,7 @@ HotSpot Architecture Description Language.  This language is used to describe
 the architecture of a processor, and is the input to the ADL Compiler.  The
 ADL Compiler compiles an ADL file into code which is incorporated into the
 Optimizing Just In Time Compiler (OJIT) to generate efficient and correct code
-for the target architecture.  The ADL describes three bassic different types
+for the target architecture.  The ADL describes three basic different types
 of architectural features.  It describes the instruction set (and associated
 operands) of the target architecture.  It describes the register set of the
 target architecture along with relevant information for the register allocator.
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@ -4143,6 +4143,7 @@ bool MatchRule::is_vector() const {
    "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
    "MulVS","MulVI","MulVL","MulVF","MulVD",
    "DivVF","DivVD",
+    "SqrtVD",
    "AndV" ,"XorV" ,"OrV",
    "AddReductionVI", "AddReductionVL",
    "AddReductionVF", "AddReductionVD",
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp
@ -32,7 +32,6 @@
 #include "c1/c1_Runtime1.hpp"
 #include "c1/c1_ValueType.hpp"
 #include "compiler/compileBroker.hpp"
-#include "compiler/compilerOracle.hpp"
 #include "interpreter/linkResolver.hpp"
 #include "memory/allocation.hpp"
 #include "memory/allocation.inline.hpp"
--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
+++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp
@ -4212,7 +4212,7 @@ void GraphBuilder::print_inlining(ciMethod* callee, const char* msg, bool succes
  if (!PrintInlining && !compilation()->method()->has_option("PrintInlining")) {
    return;
  }
-  CompileTask::print_inlining(callee, scope()->level(), bci(), msg);
+  CompileTask::print_inlining_tty(callee, scope()->level(), bci(), msg);
  if (success && CIPrintMethodCodes) {
    callee->print_codes();
  }
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@ -1425,7 +1425,7 @@ void LIRGenerator::pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
      G1SATBCardTableModRef_pre_barrier(addr_opr, pre_val, do_load, patch, info);
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      // No pre barriers
      break;
@ -1445,7 +1445,7 @@ void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
      G1SATBCardTableModRef_post_barrier(addr,  new_val);
      break;
 #endif // INCLUDE_ALL_GCS
-    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableForRS:
    case BarrierSet::CardTableExtension:
      CardTableModRef_post_barrier(addr,  new_val);
      break;
--- a/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp
+++ b/hotspot/src/share/vm/ci/bcEscapeAnalyzer.cpp
@ -1447,7 +1447,6 @@ BCEscapeAnalyzer::BCEscapeAnalyzer(ciMethod* method, BCEscapeAnalyzer* parent)

    if (methodData() == NULL)
      return;
-    bool printit = _method->should_print_assembly();
    if (methodData()->has_escape_info()) {
      TRACE_BCEA(2, tty->print_cr("[EA] Reading previous results for %s.%s",
                                  method->holder()->name()->as_utf8(),
--- a/hotspot/src/share/vm/classfile/classLoader.cpp
+++ b/hotspot/src/share/vm/classfile/classLoader.cpp
@ -28,8 +28,8 @@
 #include "classfile/classLoader.hpp"
 #include "classfile/classLoaderData.inline.hpp"
 #include "classfile/classLoaderExt.hpp"
-#include "classfile/imageFile.hpp"
 #include "classfile/javaClasses.hpp"
+#include "classfile/jimage.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "compiler/compileBroker.hpp"
@ -58,6 +58,7 @@
 #include "runtime/os.hpp"
 #include "runtime/threadCritical.hpp"
 #include "runtime/timer.hpp"
+#include "runtime/vm_version.hpp"
 #include "services/management.hpp"
 #include "services/threadService.hpp"
 #include "utilities/events.hpp"
@ -68,7 +69,7 @@
 #include "classfile/sharedPathsMiscInfo.hpp"
 #endif

-// Entry points in zip.dll for loading zip/jar file entries and image file entries
+// Entry points in zip.dll for loading zip/jar file entries

 typedef void * * (JNICALL *ZipOpen_t)(const char *name, char **pmsg);
 typedef void (JNICALL *ZipClose_t)(jzfile *zip);
@ -89,6 +90,15 @@ static canonicalize_fn_t CanonicalizeEntry  = NULL;
 static ZipInflateFully_t ZipInflateFully    = NULL;
 static Crc32_t           Crc32              = NULL;

+// Entry points for jimage.dll for loading jimage file entries
+
+static JImageOpen_t                    JImageOpen                    = NULL;
+static JImageClose_t                   JImageClose                   = NULL;
+static JImagePackageToModule_t         JImagePackageToModule         = NULL;
+static JImageFindResource_t            JImageFindResource            = NULL;
+static JImageGetResource_t             JImageGetResource             = NULL;
+static JImageResourceIterator_t        JImageResourceIterator        = NULL;
+
 // Globals

 PerfCounter*    ClassLoader::_perf_accumulated_time = NULL;
@ -141,6 +151,15 @@ bool string_starts_with(const char* str, const char* str_to_find) {
  return (strncmp(str, str_to_find, str_to_find_len) == 0);
 }

+static const char* get_jimage_version_string() {
+  static char version_string[10] = "";
+  if (version_string[0] == '\0') {
+    jio_snprintf(version_string, sizeof(version_string), "%d.%d",
+                 Abstract_VM_Version::vm_minor_version(), Abstract_VM_Version::vm_micro_version());
+  }
+  return (const char*)version_string;
+}
+
 bool string_ends_with(const char* str, const char* str_to_find) {
  size_t str_len = strlen(str);
  size_t str_to_find_len = strlen(str_to_find);
@ -272,97 +291,113 @@ void ClassPathZipEntry::contents_do(void f(const char* name, void* context), voi
  }
 }

-ClassPathImageEntry::ClassPathImageEntry(ImageFileReader* image) :
+ClassPathImageEntry::ClassPathImageEntry(JImageFile* jimage, const char* name) :
  ClassPathEntry(),
-  _image(image),
-  _module_data(NULL) {
-  guarantee(image != NULL, "image file is null");
-
-  char module_data_name[JVM_MAXPATHLEN];
-  ImageModuleData::module_data_name(module_data_name, _image->name());
-  _module_data = new ImageModuleData(_image, module_data_name);
+  _jimage(jimage) {
+  guarantee(jimage != NULL, "jimage file is null");
+  guarantee(name != NULL, "jimage file name is null");
+  size_t len = strlen(name) + 1;
+  _name = NEW_C_HEAP_ARRAY(const char, len, mtClass);
+  strncpy((char *)_name, name, len);
 }

 ClassPathImageEntry::~ClassPathImageEntry() {
-  if (_module_data != NULL) {
-    delete _module_data;
-    _module_data = NULL;
+  if (_name != NULL) {
+    FREE_C_HEAP_ARRAY(const char, _name);
+    _name = NULL;
  }
-
-  if (_image != NULL) {
-    ImageFileReader::close(_image);
-    _image = NULL;
+  if (_jimage != NULL) {
+    (*JImageClose)(_jimage);
+    _jimage = NULL;
  }
 }

-const char* ClassPathImageEntry::name() {
-  return _image ? _image->name() : "";
+void ClassPathImageEntry::name_to_package(const char* name, char* buffer, int length) {
+  const char *pslash = strrchr(name, '/');
+  if (pslash == NULL) {
+    buffer[0] = '\0';
+    return;
+  }
+  int len = pslash - name;
+#if INCLUDE_CDS
+  if (len <= 0 && DumpSharedSpaces) {
+    buffer[0] = '\0';
+    return;
+  }
+#endif
+  assert(len > 0, "Bad length for package name");
+  if (len >= length) {
+    buffer[0] = '\0';
+    return;
+  }
+  // drop name after last slash (including slash)
+  // Ex., "java/lang/String.class" => "java/lang"
+  strncpy(buffer, name, len);
+  // ensure string termination (strncpy does not guarantee)
+  buffer[len] = '\0';
 }

+// For a class in a named module, look it up in the jimage file using this syntax:
+//    /<module-name>/<package-name>/<base-class>
+//
+// Assumptions:
+//     1. There are no unnamed modules in the jimage file.
+//     2. A package is in at most one module in the jimage file.
+//
 ClassFileStream* ClassPathImageEntry::open_stream(const char* name, TRAPS) {
-  ImageLocation location;
-  bool found = _image->find_location(name, location);
+  jlong size;
+  JImageLocationRef location = (*JImageFindResource)(_jimage, "", get_jimage_version_string(), name, &size);

-  if (!found) {
-    const char *pslash = strrchr(name, '/');
-    int len = pslash - name;
-
-    // NOTE: IMAGE_MAX_PATH is used here since this path is internal to the jimage
-    // (effectively unlimited.)  There are several JCK tests that use paths over
-    // 1024 characters long, the limit on Windows systems.
-    if (pslash && 0 < len && len < IMAGE_MAX_PATH) {
-
-      char path[IMAGE_MAX_PATH];
-      strncpy(path, name, len);
-      path[len] = '\0';
-      const char* moduleName = _module_data->package_to_module(path);
-
-      if (moduleName != NULL && (len + strlen(moduleName) + 2) < IMAGE_MAX_PATH) {
-        jio_snprintf(path, IMAGE_MAX_PATH - 1, "/%s/%s", moduleName, name);
-        location.clear_data();
-        found = _image->find_location(path, location);
-      }
+  if (location == 0) {
+    char package[JIMAGE_MAX_PATH];
+    name_to_package(name, package, JIMAGE_MAX_PATH);
+    if (package[0] != '\0') {
+        const char* module = (*JImagePackageToModule)(_jimage, package);
+        if (module == NULL) {
+            module = "java.base";
+        }
+        location = (*JImageFindResource)(_jimage, module, get_jimage_version_string(), name, &size);
    }
  }

-  if (found) {
-    u8 size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
+  if (location != 0) {
    if (UsePerfData) {
      ClassLoader::perf_sys_classfile_bytes_read()->inc(size);
    }
-    u1* data = NEW_RESOURCE_ARRAY(u1, size);
-    _image->get_resource(location, data);
-    return new ClassFileStream(data, (int)size, _image->name());  // Resource allocated
+    char* data = NEW_RESOURCE_ARRAY(char, size);
+    (*JImageGetResource)(_jimage, location, data, size);
+    return new ClassFileStream((u1*)data, (int)size, _name);  // Resource allocated
  }

  return NULL;
 }

 #ifndef PRODUCT
+bool ctw_visitor(JImageFile* jimage,
+        const char* module_name, const char* version, const char* package,
+        const char* name, const char* extension, void* arg) {
+  if (strcmp(extension, "class") == 0) {
+    Thread* THREAD = Thread::current();
+    char path[JIMAGE_MAX_PATH];
+    jio_snprintf(path, JIMAGE_MAX_PATH - 1, "%s/%s.class", package, name);
+    ClassLoader::compile_the_world_in(path, *(Handle*)arg, THREAD);
+    return !HAS_PENDING_EXCEPTION;
+  }
+  return true;
+}
+
 void ClassPathImageEntry::compile_the_world(Handle loader, TRAPS) {
  tty->print_cr("CompileTheWorld : Compiling all classes in %s", name());
  tty->cr();
-  const ImageStrings strings = _image->get_strings();
-  // Retrieve each path component string.
-  u4 length = _image->table_length();
-  for (u4 i = 0; i < length; i++) {
-    u1* location_data = _image->get_location_data(i);
-
-    if (location_data != NULL) {
-       ImageLocation location(location_data);
-       char path[IMAGE_MAX_PATH];
-       _image->location_path(location, path, IMAGE_MAX_PATH);
-       ClassLoader::compile_the_world_in(path, loader, CHECK);
-    }
-  }
+  (*JImageResourceIterator)(_jimage, (JImageResourceVisitor_t)ctw_visitor, (void *)&loader);
  if (HAS_PENDING_EXCEPTION) {
-  if (PENDING_EXCEPTION->is_a(SystemDictionary::OutOfMemoryError_klass())) {
-    CLEAR_PENDING_EXCEPTION;
-    tty->print_cr("\nCompileTheWorld : Ran out of memory\n");
-    tty->print_cr("Increase class metadata storage if a limit was set");
-  } else {
-    tty->print_cr("\nCompileTheWorld : Unexpected exception occurred\n");
-  }
+    if (PENDING_EXCEPTION->is_a(SystemDictionary::OutOfMemoryError_klass())) {
+      CLEAR_PENDING_EXCEPTION;
+      tty->print_cr("\nCompileTheWorld : Ran out of memory\n");
+      tty->print_cr("Increase class metadata storage if a limit was set");
+    } else {
+      tty->print_cr("\nCompileTheWorld : Unexpected exception occurred\n");
+    }
  }
 }

@ -490,7 +525,7 @@ ClassPathEntry* ClassLoader::create_class_path_entry(const char *path, const str
  JavaThread* thread = JavaThread::current();
  ClassPathEntry* new_entry = NULL;
  if ((st->st_mode & S_IFREG) == S_IFREG) {
-    // Regular file, should be a zip or image file
+    // Regular file, should be a zip or jimage file
    // Canonicalized filename
    char canonical_path[JVM_MAXPATHLEN];
    if (!get_canonical_path(path, canonical_path, JVM_MAXPATHLEN)) {
@ -501,9 +536,10 @@ ClassPathEntry* ClassLoader::create_class_path_entry(const char *path, const str
        return NULL;
      }
    }
-    ImageFileReader* image = ImageFileReader::open(canonical_path);
-    if (image != NULL) {
-      new_entry = new ClassPathImageEntry(image);
+    jint error;
+    JImageFile* jimage =(*JImageOpen)(canonical_path, &error);
+    if (jimage != NULL) {
+      new_entry = new ClassPathImageEntry(jimage, canonical_path);
    } else {
      char* error_msg = NULL;
      jzfile* zip;
@ -682,6 +718,35 @@ void ClassLoader::load_zip_library() {
  // This lookup only works on 1.3. Do not check for non-null here
 }

+void ClassLoader::load_jimage_library() {
+  // First make sure native library is loaded
+  os::native_java_library();
+  // Load jimage library
+  char path[JVM_MAXPATHLEN];
+  char ebuf[1024];
+  void* handle = NULL;
+  if (os::dll_build_name(path, sizeof(path), Arguments::get_dll_dir(), "jimage")) {
+    handle = os::dll_load(path, ebuf, sizeof ebuf);
+  }
+  if (handle == NULL) {
+    vm_exit_during_initialization("Unable to load jimage library", path);
+  }
+
+  // Lookup jimage entry points
+  JImageOpen = CAST_TO_FN_PTR(JImageOpen_t, os::dll_lookup(handle, "JIMAGE_Open"));
+  guarantee(JImageOpen != NULL, "function JIMAGE_Open not found");
+  JImageClose = CAST_TO_FN_PTR(JImageClose_t, os::dll_lookup(handle, "JIMAGE_Close"));
+  guarantee(JImageClose != NULL, "function JIMAGE_Close not found");
+  JImagePackageToModule = CAST_TO_FN_PTR(JImagePackageToModule_t, os::dll_lookup(handle, "JIMAGE_PackageToModule"));
+  guarantee(JImagePackageToModule != NULL, "function JIMAGE_PackageToModule not found");
+  JImageFindResource = CAST_TO_FN_PTR(JImageFindResource_t, os::dll_lookup(handle, "JIMAGE_FindResource"));
+  guarantee(JImageFindResource != NULL, "function JIMAGE_FindResource not found");
+  JImageGetResource = CAST_TO_FN_PTR(JImageGetResource_t, os::dll_lookup(handle, "JIMAGE_GetResource"));
+  guarantee(JImageGetResource != NULL, "function JIMAGE_GetResource not found");
+  JImageResourceIterator = CAST_TO_FN_PTR(JImageResourceIterator_t, os::dll_lookup(handle, "JIMAGE_ResourceIterator"));
+  guarantee(JImageResourceIterator != NULL, "function JIMAGE_ResourceIterator not found");
+}
+
 jboolean ClassLoader::decompress(void *in, u8 inSize, void *out, u8 outSize, char **pmsg) {
  return (*ZipInflateFully)(in, inSize, out, outSize, pmsg);
 }
@ -1086,6 +1151,8 @@ void ClassLoader::initialize() {

  // lookup zip library entry points
  load_zip_library();
+  // lookup jimage library entry points
+  load_jimage_library();
 #if INCLUDE_CDS
  // initialize search path
  if (DumpSharedSpaces) {
--- a/hotspot/src/share/vm/classfile/classLoader.hpp
+++ b/hotspot/src/share/vm/classfile/classLoader.hpp
@ -37,8 +37,7 @@

 // Class path entry (directory or zip file)

-class ImageFileReader;
-class ImageModuleData;
+class JImageFile;

 class ClassPathEntry: public CHeapObj<mtClass> {
 private:
@ -52,7 +51,7 @@ class ClassPathEntry: public CHeapObj<mtClass> {
  }
  virtual bool is_jar_file() = 0;
  virtual const char* name() = 0;
-  virtual ImageFileReader* image() = 0;
+  virtual JImageFile* jimage() = 0;
  // Constructor
  ClassPathEntry();
  // Attempt to locate file_name through this class path entry.
@ -70,7 +69,7 @@ class ClassPathDirEntry: public ClassPathEntry {
 public:
  bool is_jar_file()       { return false;  }
  const char* name()       { return _dir; }
-  ImageFileReader* image() { return NULL; }
+  JImageFile* jimage()     { return NULL; }
  ClassPathDirEntry(const char* dir);
  ClassFileStream* open_stream(const char* name, TRAPS);
  // Debugging
@ -100,7 +99,7 @@ class ClassPathZipEntry: public ClassPathEntry {
 public:
  bool is_jar_file()       { return true;  }
  const char* name()       { return _zip_name; }
-  ImageFileReader* image() { return NULL; }
+  JImageFile* jimage()     { return NULL; }
  ClassPathZipEntry(jzfile* zip, const char* zip_name);
  ~ClassPathZipEntry();
  u1* open_entry(const char* name, jint* filesize, bool nul_terminate, TRAPS);
@ -115,16 +114,16 @@ class ClassPathZipEntry: public ClassPathEntry {
 // For java image files
 class ClassPathImageEntry: public ClassPathEntry {
 private:
-  ImageFileReader* _image;
-  ImageModuleData* _module_data;
+  JImageFile* _jimage;
+  const char* _name;
 public:
  bool is_jar_file()  { return false;  }
-  bool is_open()  { return _image != NULL; }
-  const char* name();
-  ImageFileReader* image() { return _image; }
-  ImageModuleData* module_data() { return _module_data; }
-  ClassPathImageEntry(ImageFileReader* image);
+  bool is_open()  { return _jimage != NULL; }
+  const char* name() { return _name == NULL ? "" : _name; }
+  JImageFile* jimage() { return _jimage; }
+  ClassPathImageEntry(JImageFile* jimage, const char* name);
  ~ClassPathImageEntry();
+  static void name_to_package(const char* name, char* buffer, int length);
  ClassFileStream* open_stream(const char* name, TRAPS);

  // Debugging
@ -206,6 +205,7 @@ class ClassLoader: AllStatic {
  static void setup_search_path(const char *class_path);

  static void load_zip_library();
+  static void load_jimage_library();
  static ClassPathEntry* create_class_path_entry(const char *path, const struct stat* st,
                                                 bool throw_exception, TRAPS);

--- a/hotspot/src/share/vm/classfile/imageDecompressor.cpp
+++ b/hotspot/src/share/vm/classfile/imageDecompressor.cpp
@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "runtime/thread.inline.hpp"
-#include "classfile/imageDecompressor.hpp"
-#include "runtime/thread.hpp"
-#include "utilities/bytes.hpp"
-
-/*
- * Allocate in C Heap not in resource area, otherwise JVM crashes.
- * This array life time is the VM life time. Array is never freed and
- * is not expected to contain more than few references.
- */
-GrowableArray<ImageDecompressor*>* ImageDecompressor::_decompressors =
-  new(ResourceObj::C_HEAP, mtInternal) GrowableArray<ImageDecompressor*>(2, true);
-
-static Symbol* createSymbol(const char* str) {
-  Thread* THREAD = Thread::current();
-  Symbol* sym = SymbolTable::lookup(str, (int) strlen(str), THREAD);
-  if (HAS_PENDING_EXCEPTION) {
-    warning("can't create symbol\n");
-    CLEAR_PENDING_EXCEPTION;
-    return NULL;
-  }
-  return sym;
-}
-
-/*
- * Initialize the array of decompressors.
- */
-bool image_decompressor_init() {
-  Symbol* zipSymbol = createSymbol("zip");
-  if (zipSymbol == NULL) {
-    return false;
-  }
-  ImageDecompressor::add_decompressor(new ZipDecompressor(zipSymbol));
-
-  return true;
-}
-
-/*
- * Decompression entry point. Called from ImageFileReader::get_resource.
- */
-void ImageDecompressor::decompress_resource(u1* compressed, u1* uncompressed,
-        u4 uncompressed_size, const ImageStrings* strings, bool is_C_heap) {
-  bool has_header = false;
-  u1* decompressed_resource = compressed;
-  u1* compressed_resource = compressed;
-
-  // Resource could have been transformed by a stack of decompressors.
-  // Iterate and decompress resources until there is no more header.
-  do {
-    ResourceHeader _header;
-    memcpy(&_header, compressed_resource, sizeof (ResourceHeader));
-    has_header = _header._magic == ResourceHeader::resource_header_magic;
-    if (has_header) {
-      // decompressed_resource array contains the result of decompression
-      // when a resource content is terminal, it means that it is an actual resource,
-      // not an intermediate not fully uncompressed content. In this case
-      // the resource is allocated as an mtClass, otherwise as an mtOther
-      decompressed_resource = is_C_heap && _header._is_terminal ?
-              NEW_C_HEAP_ARRAY(u1, _header._uncompressed_size, mtClass) :
-              NEW_C_HEAP_ARRAY(u1, _header._uncompressed_size, mtOther);
-      // Retrieve the decompressor name
-      const char* decompressor_name = strings->get(_header._decompressor_name_offset);
-      if (decompressor_name == NULL) warning("image decompressor not found\n");
-      guarantee(decompressor_name, "image decompressor not found");
-      // Retrieve the decompressor instance
-      ImageDecompressor* decompressor = get_decompressor(decompressor_name);
-      if (decompressor == NULL) {
-        warning("image decompressor %s not found\n", decompressor_name);
-      }
-      guarantee(decompressor, "image decompressor not found");
-      u1* compressed_resource_base = compressed_resource;
-      compressed_resource += ResourceHeader::resource_header_length;
-      // Ask the decompressor to decompress the compressed content
-      decompressor->decompress_resource(compressed_resource, decompressed_resource,
-        &_header, strings);
-      if (compressed_resource_base != compressed) {
-        FREE_C_HEAP_ARRAY(char, compressed_resource_base);
-      }
-      compressed_resource = decompressed_resource;
-    }
-  } while (has_header);
-  memcpy(uncompressed, decompressed_resource, uncompressed_size);
-}
-
-// Zip decompressor
-
-void ZipDecompressor::decompress_resource(u1* data, u1* uncompressed,
-        ResourceHeader* header, const ImageStrings* strings) {
-  char* msg = NULL;
-  jboolean res = ClassLoader::decompress(data, header->_size, uncompressed,
-          header->_uncompressed_size, &msg);
-  if (!res) warning("decompression failed due to %s\n", msg);
-  guarantee(res, "decompression failed");
-}
-
-// END Zip Decompressor
--- a/hotspot/src/share/vm/classfile/imageDecompressor.hpp
+++ b/hotspot/src/share/vm/classfile/imageDecompressor.hpp
@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_CLASSFILE_IMAGEDECOMPRESSOR_HPP
-#define SHARE_VM_CLASSFILE_IMAGEDECOMPRESSOR_HPP
-
-#include "runtime/thread.inline.hpp"
-#include "classfile/classLoader.hpp"
-#include "classfile/imageFile.hpp"
-#include "classfile/symbolTable.hpp"
-#include "oops/symbol.hpp"
-#include "utilities/growableArray.hpp"
-
-/*
- * Compressed resources located in image have an header.
- * This header contains:
- * - _magic: A magic u4, required to retrieved the header in the compressed content
- * - _size: The size of the compressed resource.
- * - _uncompressed_size: The uncompressed size of the compressed resource.
- * - _decompressor_name_offset: The ImageDecompressor instance name StringsTable offset.
- * - _decompressor_config_offset: StringsTable offset of configuration that could be needed by
- *   the decompressor in order to decompress.
- * - _is_terminal: 1: the compressed content is terminal. Uncompressing it would
- *   create the actual resource. 0: the compressed content is not terminal. Uncompressing it
- *   will result in a compressed content to be decompressed (This occurs when a stack of compressors
- *   have been used to compress the resource.
- */
-struct ResourceHeader {
-  /* Length of header, needed to retrieve content offset */
-  static const u1 resource_header_length = 21;
-  /* magic bytes that identifies a compressed resource header*/
-  static const u4 resource_header_magic = 0xCAFEFAFA;
-  u4 _magic; // Resource header
-  u4 _size;  // Resource size
-  u4 _uncompressed_size;  // Expected uncompressed size
-  u4 _decompressor_name_offset;  // Strings table decompressor offset
-  u4 _decompressor_config_offset; // Strings table config offset
-  u1 _is_terminal; // Last decompressor 1, otherwise 0.
-};
-
-/*
- * Resources located in jimage file can be compressed. Compression occurs at
- * jimage file creation time. When compressed a resource is added an header that
- * contains the name of the compressor that compressed it.
- * Various compression strategies can be applied to compress a resource.
- * The same resource can even be compressed multiple time by a stack of compressors.
- * At runtime, a resource is decompressed in a loop until there is no more header
- * meaning that the resource is equivalent to the not compressed resource.
- * In each iteration, the name of the compressor located in the current header
- * is used to retrieve the associated instance of ImageDecompressor.
- * For example “zip” is the name of the compressor that compresses resources
- * using the zip algorithm. The ZipDecompressor class name is also “zip”.
- * ImageDecompressor instances are retrieved from a static array in which
- * they are registered.
- */
-class ImageDecompressor: public CHeapObj<mtClass> {
-
-private:
-  const Symbol* _name;
-
-  /*
-   * Array of concrete decompressors. This array is used to retrieve the decompressor
-   * that can handle resource decompression.
-   */
-  static GrowableArray<ImageDecompressor*>* _decompressors;
-
-  /*
-   * Identifier of a decompressor. This name is the identification key to retrieve
-   * decompressor from a resource header.
-   */
-  inline const Symbol* get_name() const { return _name; }
-
-protected:
-  ImageDecompressor(const Symbol* name) : _name(name) {
-  }
-  virtual void decompress_resource(u1* data, u1* uncompressed,
-    ResourceHeader* header, const ImageStrings* strings) = 0;
-
-public:
-  inline static void add_decompressor(ImageDecompressor* decompressor) {
-    _decompressors->append(decompressor);
-  }
-  inline static ImageDecompressor* get_decompressor(const char * decompressor_name) {
-    Thread* THREAD = Thread::current();
-    TempNewSymbol sym = SymbolTable::new_symbol(decompressor_name,
-            (int) strlen(decompressor_name), CHECK_NULL);
-    if (HAS_PENDING_EXCEPTION) {
-      warning("can't create symbol\n");
-      CLEAR_PENDING_EXCEPTION;
-      return NULL;
-    }
-    for (int i = 0; i < _decompressors->length(); i++) {
-      ImageDecompressor* decompressor = _decompressors->at(i);
-      if (decompressor->get_name()->fast_compare(sym) == 0) {
-        return decompressor;
-      }
-    }
-    guarantee(false, "No decompressor found.");
-    return NULL;
-  }
-  static void decompress_resource(u1* compressed, u1* uncompressed,
-    u4 uncompressed_size, const ImageStrings* strings, bool is_C_heap);
-};
-
-/**
- * Zip decompressor.
- */
-class ZipDecompressor : public ImageDecompressor {
-public:
-  ZipDecompressor(const Symbol* sym) : ImageDecompressor(sym) { }
-  void decompress_resource(u1* data, u1* uncompressed, ResourceHeader* header,
-    const ImageStrings* strings);
-};
-
-#endif // SHARE_VM_CLASSFILE_IMAGEDECOMPRESSOR_HPP
--- a/hotspot/src/share/vm/classfile/imageFile.cpp
+++ b/hotspot/src/share/vm/classfile/imageFile.cpp
@ -1,546 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "classfile/imageDecompressor.hpp"
-#include "classfile/imageFile.hpp"
-#include "memory/resourceArea.hpp"
-#include "runtime/mutex.hpp"
-#include "runtime/mutexLocker.hpp"
-#include "runtime/os.inline.hpp"
-#include "utilities/endian.hpp"
-#include "utilities/growableArray.hpp"
-
-// Image files are an alternate file format for storing classes and resources. The
-// goal is to supply file access which is faster and smaller than the jar format.
-//
-// (More detailed nodes in the header.)
-//
-
-// Compute the Perfect Hashing hash code for the supplied UTF-8 string.
-s4 ImageStrings::hash_code(const char* string, s4 seed) {
-  // Access bytes as unsigned.
-  u1* bytes = (u1*)string;
-  // Compute hash code.
-  for (u1 byte = *bytes++; byte; byte = *bytes++) {
-    seed = (seed * HASH_MULTIPLIER) ^ byte;
-  }
-  // Ensure the result is not signed.
-  return seed & 0x7FFFFFFF;
-}
-
-// Match up a string in a perfect hash table.  Result still needs validation
-// for precise match (false positive.)
-s4 ImageStrings::find(Endian* endian, const char* name, s4* redirect, u4 length) {
-  // If the table is empty, then short cut.
-  if (redirect == NULL || length == 0) {
-    return NOT_FOUND;
-  }
-  // Compute the basic perfect hash for name.
-  s4 hash_code = ImageStrings::hash_code(name);
-  // Modulo table size.
-  s4 index = hash_code % length;
-  // Get redirect entry.
-  //   value == 0 then not found
-  //   value < 0 then -1 - value is true index
-  //   value > 0 then value is seed for recomputing hash.
-  s4 value = endian->get(redirect[index]);
-  // if recompute is required.
-  if (value > 0) {
-    // Entry collision value, need to recompute hash.
-    hash_code = ImageStrings::hash_code(name, value);
-    // Modulo table size.
-    return hash_code % length;
-  } else if (value < 0) {
-    // Compute direct index.
-    return -1 - value;
-  }
-  // No entry found.
-  return NOT_FOUND;
-}
-
-// Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
-// return non-NULL address of remaining portion of string.  Otherwise, return
-// NULL.  Used to test sections of a path without copying from image string
-// table.
-const char* ImageStrings::starts_with(const char* string, const char* start) {
-  char ch1, ch2;
-  // Match up the strings the best we can.
-  while ((ch1 = *string) && (ch2 = *start)) {
-    if (ch1 != ch2) {
-      // Mismatch, return NULL.
-      return NULL;
-    }
-    // Next characters.
-    string++, start++;
-  }
-  // Return remainder of string.
-  return string;
-}
-
-// Inflates the attribute stream into individual values stored in the long
-// array _attributes. This allows an attribute value to be quickly accessed by
-// direct indexing.  Unspecified values default to zero (from constructor.)
-void ImageLocation::set_data(u1* data) {
-  // Deflate the attribute stream into an array of attributes.
-  u1 byte;
-  // Repeat until end header is found.
-  while ((byte = *data)) {
-    // Extract kind from header byte.
-    u1 kind = attribute_kind(byte);
-    guarantee(kind < ATTRIBUTE_COUNT, "invalid image location attribute");
-    // Extract length of data (in bytes).
-    u1 n = attribute_length(byte);
-    // Read value (most significant first.)
-    _attributes[kind] = attribute_value(data + 1, n);
-    // Position to next attribute by skipping attribute header and data bytes.
-    data += n + 1;
-  }
-}
-
-// Zero all attribute values.
-void ImageLocation::clear_data() {
-  // Set defaults to zero.
-  memset(_attributes, 0, sizeof(_attributes));
-}
-
-// ImageModuleData constructor maps out sub-tables for faster access.
-ImageModuleData::ImageModuleData(const ImageFileReader* image_file,
-        const char* module_data_name) :
-    _image_file(image_file),
-    _endian(image_file->endian()),
-    _strings(image_file->get_strings()) {
-  // Retrieve the resource containing the module data for the image file.
-  ImageLocation location;
-  bool found = image_file->find_location(module_data_name, location);
-  guarantee(found, "missing module data");
-  u8 data_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
-  _data = (u1*)NEW_C_HEAP_ARRAY(char, data_size, mtClass);
-  _image_file->get_resource(location, _data);
-  // Map out the header.
-  _header = (Header*)_data;
-  // Get the package to module entry count.
-  u4 ptm_count = _header->ptm_count(_endian);
-  // Get the module to package entry count.
-  u4 mtp_count = _header->mtp_count(_endian);
-  // Compute the offset of the package to module perfect hash redirect.
-  u4 ptm_redirect_offset = sizeof(Header);
-  // Compute the offset of the package to module data.
-  u4 ptm_data_offset = ptm_redirect_offset + ptm_count * sizeof(s4);
-  // Compute the offset of the module to package perfect hash redirect.
-  u4 mtp_redirect_offset = ptm_data_offset + ptm_count * sizeof(PTMData);
-  // Compute the offset of the module to package data.
-  u4 mtp_data_offset = mtp_redirect_offset + mtp_count * sizeof(s4);
-  // Compute the offset of the module to package tables.
-  u4 mtp_packages_offset = mtp_data_offset + mtp_count * sizeof(MTPData);
-  // Compute the address of the package to module perfect hash redirect.
-  _ptm_redirect = (s4*)(_data + ptm_redirect_offset);
-  // Compute the address of the package to module data.
-  _ptm_data = (PTMData*)(_data + ptm_data_offset);
-  // Compute the address of the module to package perfect hash redirect.
-  _mtp_redirect = (s4*)(_data + mtp_redirect_offset);
-  // Compute the address of the module to package data.
-  _mtp_data = (MTPData*)(_data + mtp_data_offset);
-  // Compute the address of the module to package tables.
-  _mtp_packages = (s4*)(_data + mtp_packages_offset);
-}
-
-// Release module data resource.
-ImageModuleData::~ImageModuleData() {
-  if (_data != NULL) {
-    FREE_C_HEAP_ARRAY(u1, _data);
-  }
-}
-
-// Return the name of the module data resource.  Ex. "./lib/modules/file.jimage"
-// yields "file.jdata"
-void ImageModuleData::module_data_name(char* buffer, const char* image_file_name) {
-  // Locate the last slash in the file name path.
-  const char* slash = strrchr(image_file_name, os::file_separator()[0]);
-  // Trim the path to name and extension.
-  const char* name = slash != NULL ? slash + 1 : (char *)image_file_name;
-  // Locate the extension period.
-  const char* dot = strrchr(name, '.');
-  guarantee(dot, "missing extension on jimage name");
-  // Trim to only base name.
-  int length = dot - name;
-  strncpy(buffer, name, length);
-  buffer[length] = '\0';
-  // Append extension.
-  strcat(buffer, ".jdata");
-}
-
-// Return the module in which a package resides.  Returns NULL if not found.
-const char* ImageModuleData::package_to_module(const char* package_name) {
-  // Search the package to module table.
-  s4 index = ImageStrings::find(_endian, package_name, _ptm_redirect,
-                                  _header->ptm_count(_endian));
-  // If entry is found.
-  if (index != ImageStrings::NOT_FOUND) {
-    // Retrieve the package to module entry.
-    PTMData* data = _ptm_data + index;
-    // Verify that it is the correct data.
-    if (strcmp(package_name, get_string(data->name_offset(_endian))) != 0) {
-      return NULL;
-    }
-    // Return the module name.
-    return get_string(data->module_name_offset(_endian));
-  }
-  return NULL;
-}
-
-// Returns all the package names in a module.  Returns NULL if module not found.
-GrowableArray<const char*>* ImageModuleData::module_to_packages(const char* module_name) {
-  // Search the module to package table.
-  s4 index = ImageStrings::find(_endian, module_name, _mtp_redirect,
-                                  _header->mtp_count(_endian));
-  // If entry is found.
-  if (index != ImageStrings::NOT_FOUND) {
-    // Retrieve the module to package entry.
-    MTPData* data = _mtp_data + index;
-    // Verify that it is the correct data.
-    if (strcmp(module_name, get_string(data->name_offset(_endian))) != 0) {
-      return NULL;
-    }
-    // Construct an array of all the package entries.
-    GrowableArray<const char*>* packages = new GrowableArray<const char*>();
-    s4 package_offset = data->package_offset(_endian);
-    for (u4 i = 0; i < data->package_count(_endian); i++) {
-      u4 package_name_offset = mtp_package(package_offset + i);
-      const char* package_name = get_string(package_name_offset);
-      packages->append(package_name);
-    }
-    return packages;
-  }
-  return NULL;
-}
-
-// Table to manage multiple opens of an image file.
-GrowableArray<ImageFileReader*>* ImageFileReader::_reader_table =
-  new(ResourceObj::C_HEAP, mtInternal) GrowableArray<ImageFileReader*>(2, true);
-
-// Open an image file, reuse structure if file already open.
-ImageFileReader* ImageFileReader::open(const char* name, bool big_endian) {
-  // Lock out _reader_table.
-  MutexLocker ml(ImageFileReaderTable_lock);
-  ImageFileReader* reader;
-  // Search for an exist image file.
-  for (int i = 0; i < _reader_table->length(); i++) {
-    // Retrieve table entry.
-    reader = _reader_table->at(i);
-    // If name matches, then reuse (bump up use count.)
-    if (strcmp(reader->name(), name) == 0) {
-      reader->inc_use();
-      return reader;
-    }
-  }
-  // Need a new image reader.
-  reader = new ImageFileReader(name, big_endian);
-  bool opened = reader->open();
-  // If failed to open.
-  if (!opened) {
-    delete reader;
-    return NULL;
-  }
-  // Bump use count and add to table.
-  reader->inc_use();
-  _reader_table->append(reader);
-  return reader;
-}
-
-// Close an image file if the file is not in use elsewhere.
-void ImageFileReader::close(ImageFileReader *reader) {
-  // Lock out _reader_table.
-  MutexLocker ml(ImageFileReaderTable_lock);
-  // If last use then remove from table and then close.
-  if (reader->dec_use()) {
-    _reader_table->remove(reader);
-    delete reader;
-  }
-}
-
-// Return an id for the specifed ImageFileReader.
-u8 ImageFileReader::readerToID(ImageFileReader *reader) {
-  // ID is just the cloaked reader address.
-  return (u8)reader;
-}
-
-// Validate the image id.
-bool ImageFileReader::idCheck(u8 id) {
-  // Make sure the ID is a managed (_reader_table) reader.
-  MutexLocker ml(ImageFileReaderTable_lock);
-  return _reader_table->contains((ImageFileReader*)id);
-}
-
-// Return an id for the specifed ImageFileReader.
-ImageFileReader* ImageFileReader::idToReader(u8 id) {
-#ifdef PRODUCT
-  // Fast convert.
-  return (ImageFileReader*)id;
-#else
-  // Do a slow check before fast convert.
-  return idCheck(id) ? (ImageFileReader*)id : NULL;
-#endif
-}
-
-// Constructor intializes to a closed state.
-ImageFileReader::ImageFileReader(const char* name, bool big_endian) {
-  // Copy the image file name.
-  _name = NEW_C_HEAP_ARRAY(char, strlen(name) + 1, mtClass);
-  strcpy(_name, name);
-  // Initialize for a closed file.
-  _fd = -1;
-  _endian = Endian::get_handler(big_endian);
-  _index_data = NULL;
-}
-
-// Close image and free up data structures.
-ImageFileReader::~ImageFileReader() {
-  // Ensure file is closed.
-  close();
-  // Free up name.
-  if (_name != NULL) {
-    FREE_C_HEAP_ARRAY(char, _name);
-    _name = NULL;
-  }
-}
-
-// Open image file for read access.
-bool ImageFileReader::open() {
-  // If file exists open for reading.
-  struct stat st;
-  if (os::stat(_name, &st) != 0 ||
-    (st.st_mode & S_IFREG) != S_IFREG ||
-    (_fd = os::open(_name, 0, O_RDONLY)) == -1) {
-    return false;
-  }
-  // Retrieve the file size.
-  _file_size = (u8)st.st_size;
-  // Read image file header and verify it has a valid header.
-  size_t header_size = sizeof(ImageHeader);
-  if (_file_size < header_size ||
-    !read_at((u1*)&_header, header_size, 0) ||
-    _header.magic(_endian) != IMAGE_MAGIC ||
-    _header.major_version(_endian) != MAJOR_VERSION ||
-    _header.minor_version(_endian) != MINOR_VERSION) {
-    close();
-    return false;
-  }
-  // Size of image index.
-  _index_size = index_size();
-  // Make sure file is large enough to contain the index.
-  if (_file_size < _index_size) {
-    return false;
-  }
-  // Determine how much of the image is memory mapped.
-  off_t map_size = (off_t)(MemoryMapImage ? _file_size : _index_size);
-  // Memory map image (minimally the index.)
-  _index_data = (u1*)os::map_memory(_fd, _name, 0, NULL, map_size, true, false);
-  guarantee(_index_data, "image file not memory mapped");
-  // Retrieve length of index perfect hash table.
-  u4 length = table_length();
-  // Compute offset of the perfect hash table redirect table.
-  u4 redirect_table_offset = (u4)header_size;
-  // Compute offset of index attribute offsets.
-  u4 offsets_table_offset = redirect_table_offset + length * sizeof(s4);
-  // Compute offset of index location attribute data.
-  u4 location_bytes_offset = offsets_table_offset + length * sizeof(u4);
-  // Compute offset of index string table.
-  u4 string_bytes_offset = location_bytes_offset + locations_size();
-  // Compute address of the perfect hash table redirect table.
-  _redirect_table = (s4*)(_index_data + redirect_table_offset);
-  // Compute address of index attribute offsets.
-  _offsets_table = (u4*)(_index_data + offsets_table_offset);
-  // Compute address of index location attribute data.
-  _location_bytes = _index_data + location_bytes_offset;
-  // Compute address of index string table.
-  _string_bytes = _index_data + string_bytes_offset;
-  // Successful open.
-  return true;
-}
-
-// Close image file.
-void ImageFileReader::close() {
-  // Dealllocate the index.
-  if (_index_data != NULL) {
-    os::unmap_memory((char*)_index_data, _index_size);
-    _index_data = NULL;
-  }
-  // Close file.
-  if (_fd != -1) {
-    os::close(_fd);
-    _fd = -1;
-  }
-}
-
-// Read directly from the file.
-bool ImageFileReader::read_at(u1* data, u8 size, u8 offset) const {
-  return os::read_at(_fd, data, size, offset) == size;
-}
-
-// Find the location attributes associated with the path.  Returns true if
-// the location is found, false otherwise.
-bool ImageFileReader::find_location(const char* path, ImageLocation& location) const {
-  // Locate the entry in the index perfect hash table.
-  s4 index = ImageStrings::find(_endian, path, _redirect_table, table_length());
-  // If is found.
-  if (index != ImageStrings::NOT_FOUND) {
-    // Get address of first byte of location attribute stream.
-    u1* data = get_location_data(index);
-    // Expand location attributes.
-    location.set_data(data);
-    // Make sure result is not a false positive.
-    return verify_location(location, path);
-  }
-  return false;
-}
-
-// Assemble the location path from the string fragments indicated in the location attributes.
-void ImageFileReader::location_path(ImageLocation& location, char* path, size_t max) const {
-  // Manage the image string table.
-  ImageStrings strings(_string_bytes, _header.strings_size(_endian));
-  // Position to first character of the path buffer.
-  char* next = path;
-  // Temp for string length.
-  size_t length;
-  // Get module string.
-  const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
-  // If module string is not empty string.
-  if (*module != '\0') {
-    // Get length of module name.
-    length = strlen(module);
-    // Make sure there is no buffer overflow.
-    guarantee(next - path + length + 2 < max, "buffer overflow");
-    // Append '/module/'.
-    *next++ = '/';
-    strcpy(next, module); next += length;
-    *next++ = '/';
-  }
-  // Get parent (package) string.
-  const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
-  // If parent string is not empty string.
-  if (*parent != '\0') {
-    // Get length of module string.
-    length = strlen(parent);
-    // Make sure there is no buffer overflow.
-    guarantee(next - path + length + 1 < max, "buffer overflow");
-    // Append 'patent/' .
-    strcpy(next, parent); next += length;
-    *next++ = '/';
-  }
-  // Get base name string.
-  const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
-  // Get length of base name.
-  length = strlen(base);
-  // Make sure there is no buffer overflow.
-  guarantee(next - path + length < max, "buffer overflow");
-  // Append base name.
-  strcpy(next, base); next += length;
-  // Get extension string.
-  const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
-  // If extension string is not empty string.
-  if (*extension != '\0') {
-    // Get length of extension string.
-    length = strlen(extension);
-    // Make sure there is no buffer overflow.
-    guarantee(next - path + length + 1 < max, "buffer overflow");
-    // Append '.extension' .
-    *next++ = '.';
-    strcpy(next, extension); next += length;
-  }
-  // Make sure there is no buffer overflow.
-  guarantee((size_t)(next - path) < max, "buffer overflow");
-  // Terminate string.
-  *next = '\0';
-}
-
-// Verify that a found location matches the supplied path (without copying.)
-bool ImageFileReader::verify_location(ImageLocation& location, const char* path) const {
-  // Manage the image string table.
-  ImageStrings strings(_string_bytes, _header.strings_size(_endian));
-  // Position to first character of the path string.
-  const char* next = path;
-  // Get module name string.
-  const char* module = location.get_attribute(ImageLocation::ATTRIBUTE_MODULE, strings);
-  // If module string is not empty.
-  if (*module != '\0') {
-    // Compare '/module/' .
-    if (*next++ != '/') return false;
-    if (!(next = ImageStrings::starts_with(next, module))) return false;
-    if (*next++ != '/') return false;
-  }
-  // Get parent (package) string
-  const char* parent = location.get_attribute(ImageLocation::ATTRIBUTE_PARENT, strings);
-  // If parent string is not empty string.
-  if (*parent != '\0') {
-    // Compare 'parent/' .
-    if (!(next = ImageStrings::starts_with(next, parent))) return false;
-    if (*next++ != '/') return false;
-  }
-  // Get base name string.
-  const char* base = location.get_attribute(ImageLocation::ATTRIBUTE_BASE, strings);
-  // Compare with basne name.
-  if (!(next = ImageStrings::starts_with(next, base))) return false;
-  // Get extension string.
-  const char* extension = location.get_attribute(ImageLocation::ATTRIBUTE_EXTENSION, strings);
-  // If extension is not empty.
-  if (*extension != '\0') {
-    // Compare '.extension' .
-    if (*next++ != '.') return false;
-    if (!(next = ImageStrings::starts_with(next, extension))) return false;
-  }
-  // True only if complete match and no more characters.
-  return *next == '\0';
-}
-
-// Return the resource data for the supplied location.
-void ImageFileReader::get_resource(ImageLocation& location, u1* uncompressed_data) const {
-  // Retrieve the byte offset and size of the resource.
-  u8 offset = location.get_attribute(ImageLocation::ATTRIBUTE_OFFSET);
-  u8 uncompressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_UNCOMPRESSED);
-  u8 compressed_size = location.get_attribute(ImageLocation::ATTRIBUTE_COMPRESSED);
-  if (compressed_size != 0) {
-    ResourceMark rm;
-    u1* compressed_data;
-    // If not memory mapped read in bytes.
-    if (!MemoryMapImage) {
-      // Allocate buffer for compression.
-      compressed_data = NEW_RESOURCE_ARRAY(u1, compressed_size);
-      // Read bytes from offset beyond the image index.
-      bool is_read = read_at(compressed_data, compressed_size, _index_size + offset);
-      guarantee(is_read, "error reading from image or short read");
-    } else {
-      compressed_data = get_data_address() + offset;
-    }
-    // Get image string table.
-    const ImageStrings strings = get_strings();
-    // Decompress resource.
-    ImageDecompressor::decompress_resource(compressed_data, uncompressed_data, uncompressed_size,
-            &strings, false);
-  } else {
-    // Read bytes from offset beyond the image index.
-    bool is_read = read_at(uncompressed_data, uncompressed_size, _index_size + offset);
-    guarantee(is_read, "error reading from image or short read");
-  }
-}
--- a/hotspot/src/share/vm/classfile/imageFile.hpp
+++ b/hotspot/src/share/vm/classfile/imageFile.hpp
@ -1,602 +0,0 @@
-/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP
-#define SHARE_VM_CLASSFILE_IMAGEFILE_HPP
-
-#include "classfile/classLoader.hpp"
-#include "memory/allocation.hpp"
-#include "memory/allocation.inline.hpp"
-#include "utilities/endian.hpp"
-#include "utilities/globalDefinitions.hpp"
-#include "utilities/growableArray.hpp"
-
-// Image files are an alternate file format for storing classes and resources. The
-// goal is to supply file access which is faster and smaller than the jar format.
-// It should be noted that unlike jars, information stored in an image is in native
-// endian format. This allows the image to be mapped into memory without endian
-// translation.  This also means that images are platform dependent.
-//
-// Image files are structured as three sections;
-//
-//         +-----------+
-//         |  Header   |
-//         +-----------+
-//         |           |
-//         |   Index   |
-//         |           |
-//         +-----------+
-//         |           |
-//         |           |
-//         | Resources |
-//         |           |
-//         |           |
-//         +-----------+
-//
-// The header contains information related to identification and description of
-// contents.
-//
-//         +-------------------------+
-//         |   Magic (0xCAFEDADA)    |
-//         +------------+------------+
-//         | Major Vers | Minor Vers |
-//         +------------+------------+
-//         |          Flags          |
-//         +-------------------------+
-//         |      Resource Count     |
-//         +-------------------------+
-//         |       Table Length      |
-//         +-------------------------+
-//         |      Attributes Size    |
-//         +-------------------------+
-//         |       Strings Size      |
-//         +-------------------------+
-//
-// Magic - means of identifying validity of the file.  This avoids requiring a
-//         special file extension.
-// Major vers, minor vers - differences in version numbers indicate structural
-//                          changes in the image.
-// Flags - various image wide flags (future).
-// Resource count - number of resources in the file.
-// Table length - the length of lookup tables used in the index.
-// Attributes size - number of bytes in the region used to store location attribute
-//                   streams.
-// Strings size - the size of the region used to store strings used by the
-//                index and meta data.
-//
-// The index contains information related to resource lookup. The algorithm
-// used for lookup is "A Practical Minimal Perfect Hashing Method"
-// (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
-// in the form /<module>/<package>/<base>.<extension>  return the resource location
-// information;
-//
-//     redirectIndex = hash(path, DEFAULT_SEED) % table_length;
-//     redirect = redirectTable[redirectIndex];
-//     if (redirect == 0) return not found;
-//     locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length;
-//     location = locationTable[locationIndex];
-//     if (!verify(location, path)) return not found;
-//     return location;
-//
-// Note: The hash function takes an initial seed value.  A different seed value
-// usually returns a different result for strings that would otherwise collide with
-// other seeds. The verify function guarantees the found resource location is
-// indeed the resource we are looking for.
-//
-// The following is the format of the index;
-//
-//         +-------------------+
-//         |   Redirect Table  |
-//         +-------------------+
-//         | Attribute Offsets |
-//         +-------------------+
-//         |   Attribute Data  |
-//         +-------------------+
-//         |      Strings      |
-//         +-------------------+
-//
-// Redirect Table - Array of 32-bit signed values representing actions that
-//                  should take place for hashed strings that map to that
-//                  value.  Negative values indicate no hash collision and can be
-//                  quickly converted to indices into attribute offsets.  Positive
-//                  values represent a new seed for hashing an index into attribute
-//                  offsets.  Zero indicates not found.
-// Attribute Offsets - Array of 32-bit unsigned values representing offsets into
-//                     attribute data.  Attribute offsets can be iterated to do a
-//                     full survey of resources in the image.  Offset of zero
-//                     indicates no attributes.
-// Attribute Data - Bytes representing compact attribute data for locations. (See
-//                  comments in ImageLocation.)
-// Strings - Collection of zero terminated UTF-8 strings used by the index and
-//           image meta data.  Each string is accessed by offset.  Each string is
-//           unique.  Offset zero is reserved for the empty string.
-//
-// Note that the memory mapped index assumes 32 bit alignment of each component
-// in the index.
-//
-// Endianness of an image.
-// An image booted by hotspot is always in native endian.  However, it is possible
-// to read (by the JDK) in alternate endian format.  Primarily, this is during
-// cross platform scenarios.  Ex, where javac needs to read an embedded image
-// to access classes for crossing compilation.
-//
-
-class ImageFileReader; // forward declaration
-
-// Manage image file string table.
-class ImageStrings VALUE_OBJ_CLASS_SPEC {
-private:
-  u1* _data; // Data bytes for strings.
-  u4 _size; // Number of bytes in the string table.
-public:
-  enum {
-    // Not found result from find routine.
-    NOT_FOUND = -1,
-    // Prime used to generate hash for Perfect Hashing.
-    HASH_MULTIPLIER = 0x01000193
-  };
-
-  ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
-
-  // Return the UTF-8 string beginning at offset.
-  inline const char* get(u4 offset) const {
-    guarantee(offset < _size, "offset exceeds string table size");
-    return (const char*)(_data + offset);
-  }
-
-  // Compute the Perfect Hashing hash code for the supplied UTF-8 string.
-  inline static u4 hash_code(const char* string) {
-    return hash_code(string, HASH_MULTIPLIER);
-  }
-
-  // Compute the Perfect Hashing hash code for the supplied string, starting at seed.
-  static s4 hash_code(const char* string, s4 seed);
-
-  // Match up a string in a perfect hash table.  Result still needs validation
-  // for precise match.
-  static s4 find(Endian* endian, const char* name, s4* redirect, u4 length);
-
-  // Test to see if UTF-8 string begins with the start UTF-8 string.  If so,
-  // return non-NULL address of remaining portion of string.  Otherwise, return
-  // NULL.  Used to test sections of a path without copying from image string
-  // table.
-  static const char* starts_with(const char* string, const char* start);
-
-  // Test to see if UTF-8 string begins with start char.  If so, return non-NULL
-  // address of remaining portion of string.  Otherwise, return NULL.  Used
-  // to test a character of a path without copying.
-  inline static const char* starts_with(const char* string, const char ch) {
-    return *string == ch ? string + 1 : NULL;
-  }
-};
-
-// Manage image file location attribute data.  Within an image, a location's
-// attributes are compressed into a stream of bytes.  An attribute stream is
-// composed of individual attribute sequences.  Each attribute sequence begins with
-// a header byte containing the attribute 'kind' (upper 5 bits of header) and the
-// 'length' less 1 (lower 3 bits of header) of bytes that follow containing the
-// attribute value.  Attribute values present as most significant byte first.
-//
-// Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22
-// (kind = 4, length = 3), 0x03, 0x35, 0x62.
-//
-// An attribute stream is terminated with a header kind of ATTRIBUTE_END (header
-// byte of zero.)
-//
-// ImageLocation inflates the stream into individual values stored in the long
-// array _attributes. This allows an attribute value can be quickly accessed by
-// direct indexing. Unspecified values default to zero.
-//
-// Notes:
-//  - Even though ATTRIBUTE_END is used to mark the end of the attribute stream,
-//    streams will contain zero byte values to represent lesser significant bits.
-//    Thus, detecting a zero byte is not sufficient to detect the end of an attribute
-//    stream.
-//  - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
-//    storing the resources.  Thus, in an image this represents the number of bytes
-//    after the index.
-//  - Currently, compressed resources are represented by having a non-zero
-//    ATTRIBUTE_COMPRESSED value.  This represents the number of bytes stored in the
-//    image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
-//    inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value
-//    of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and
-//    in memory.  In the future, additional compression techniques will be used and
-//    represented differently.
-//  - Package strings include trailing slash and extensions include prefix period.
-//
-class ImageLocation VALUE_OBJ_CLASS_SPEC {
-public:
-  enum {
-    ATTRIBUTE_END,          // End of attribute stream marker
-    ATTRIBUTE_MODULE,       // String table offset of module name
-    ATTRIBUTE_PARENT,       // String table offset of resource path parent
-    ATTRIBUTE_BASE,         // String table offset of resource path base
-    ATTRIBUTE_EXTENSION,    // String table offset of resource path extension
-    ATTRIBUTE_OFFSET,       // Container byte offset of resource
-    ATTRIBUTE_COMPRESSED,   // In image byte size of the compressed resource
-    ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource
-    ATTRIBUTE_COUNT         // Number of attribute kinds
-  };
-
-private:
-  // Values of inflated attributes.
-  u8 _attributes[ATTRIBUTE_COUNT];
-
-  // Return the attribute value number of bytes.
-  inline static u1 attribute_length(u1 data) {
-    return (data & 0x7) + 1;
-  }
-
-  // Return the attribute kind.
-  inline static u1 attribute_kind(u1 data) {
-    u1 kind = data >> 3;
-    guarantee(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
-    return kind;
-  }
-
-  // Return the attribute length.
-  inline static u8 attribute_value(u1* data, u1 n) {
-    guarantee(0 < n && n <= 8, "invalid attribute value length");
-    u8 value = 0;
-    // Most significant bytes first.
-    for (u1 i = 0; i < n; i++) {
-      value <<= 8;
-      value |= data[i];
-    }
-    return value;
-  }
-
-public:
-  ImageLocation() {
-    clear_data();
-  }
-
-  ImageLocation(u1* data) {
-    clear_data();
-    set_data(data);
-  }
-
-  // Inflates the attribute stream into individual values stored in the long
-  // array _attributes. This allows an attribute value to be quickly accessed by
-  // direct indexing. Unspecified values default to zero.
-  void set_data(u1* data);
-
-  // Zero all attribute values.
-  void clear_data();
-
-  // Retrieve an attribute value from the inflated array.
-  inline u8 get_attribute(u1 kind) const {
-    guarantee(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
-    return _attributes[kind];
-  }
-
-  // Retrieve an attribute string value from the inflated array.
-  inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {
-    return strings.get((u4)get_attribute(kind));
-  }
-};
-
-//
-// NOTE: needs revision.
-// Each loader requires set of module meta data to identify which modules and
-// packages are managed by that loader.  Currently, there is one image file per
-// builtin loader, so only one  module meta data resource per file.
-//
-// Each element in the module meta data is a native endian 4 byte integer.  Note
-// that entries with zero offsets for string table entries should be ignored (
-// padding for hash table lookup.)
-//
-// Format:
-//    Count of package to module entries
-//    Count of module to package entries
-//    Perfect Hash redirect table[Count of package to module entries]
-//    Package to module entries[Count of package to module entries]
-//        Offset to package name in string table
-//        Offset to module name in string table
-//    Perfect Hash redirect table[Count of module to package entries]
-//    Module to package entries[Count of module to package entries]
-//        Offset to module name in string table
-//        Count of packages in module
-//        Offset to first package in packages table
-//    Packages[]
-//        Offset to package name in string table
-//
-// Manage the image module meta data.
-class ImageModuleData : public CHeapObj<mtClass> {
-  class Header VALUE_OBJ_CLASS_SPEC {
-  private:
-    u4 _ptm_count;      // Count of package to module entries
-    u4 _mtp_count;      // Count of module to package entries
-  public:
-    inline u4 ptm_count(Endian* endian) const { return endian->get(_ptm_count); }
-    inline u4 mtp_count(Endian* endian) const { return endian->get(_mtp_count); }
-  };
-
-  // Hashtable entry
-  class HashData VALUE_OBJ_CLASS_SPEC {
-  private:
-    u4 _name_offset;    // Name offset in string table
-  public:
-    inline s4 name_offset(Endian* endian) const { return endian->get(_name_offset); }
-  };
-
-  // Package to module hashtable entry
-  class PTMData : public HashData {
-  private:
-    u4 _module_name_offset; // Module name offset in string table
-  public:
-    inline s4 module_name_offset(Endian* endian) const { return endian->get(_module_name_offset); }
-  };
-
-  // Module to package hashtable entry
-  class MTPData : public HashData {
-  private:
-    u4 _package_count;     // Number of packages in module
-    u4 _package_offset;    // Offset in package list
-  public:
-    inline u4 package_count(Endian* endian)  const { return endian->get(_package_count); }
-    inline u4 package_offset(Endian* endian) const { return endian->get(_package_offset); }
-  };
-
-  const ImageFileReader* _image_file; // Source image file
-  Endian* _endian;                    // Endian handler
-  ImageStrings _strings;              // Image file strings
-  u1* _data;                          // Module data resource data
-  u8 _data_size;                      // Size of resource data
-  Header* _header;                    // Module data header
-  s4* _ptm_redirect;                  // Package to module hashtable redirect
-  PTMData* _ptm_data;                 // Package to module data
-  s4* _mtp_redirect;                  // Module to packages hashtable redirect
-  MTPData* _mtp_data;                 // Module to packages data
-  s4* _mtp_packages;                  // Package data (name offsets)
-
-  // Return a string from the string table.
-  inline const char* get_string(u4 offset) {
-    return _strings.get(offset);
-  }
-
-  inline u4 mtp_package(u4 index) {
-    return _endian->get(_mtp_packages[index]);
-  }
-
-public:
-  ImageModuleData(const ImageFileReader* image_file, const char* module_data_name);
-  ~ImageModuleData();
-
-  // Return the name of the module data resource.
-  static void module_data_name(char* buffer, const char* image_file_name);
-
-  // Return the module in which a package resides.  Returns NULL if not found.
-  const char* package_to_module(const char* package_name);
-
-  // Returns all the package names in a module.  Returns NULL if module not found.
-  GrowableArray<const char*>* module_to_packages(const char* module_name);
-};
-
-// Image file header, starting at offset 0.
-class ImageHeader VALUE_OBJ_CLASS_SPEC {
-private:
-  u4 _magic;           // Image file marker
-  u4 _version;         // Image file major version number
-  u4 _flags;           // Image file flags
-  u4 _resource_count;  // Number of resources in file
-  u4 _table_length;    // Number of slots in index tables
-  u4 _locations_size;  // Number of bytes in attribute table
-  u4 _strings_size;    // Number of bytes in string table
-
-public:
-  u4 magic() const { return _magic; }
-  u4 magic(Endian* endian) const { return endian->get(_magic); }
-  void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); }
-
-  u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; }
-  u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; }
-  void set_version(Endian* endian, u4 major_version, u4 minor_version) {
-    return endian->set(_version, major_version << 16 | minor_version);
-  }
-
-  u4 flags(Endian* endian) const { return endian->get(_flags); }
-  void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); }
-
-  u4 resource_count(Endian* endian) const { return endian->get(_resource_count); }
-  void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); }
-
-  u4 table_length(Endian* endian) const { return endian->get(_table_length); }
-  void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); }
-
-  u4 locations_size(Endian* endian) const { return endian->get(_locations_size); }
-  void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); }
-
-  u4 strings_size(Endian* endian) const { return endian->get(_strings_size); }
-  void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); }
-};
-
-// Max path length limit independent of platform.  Windows max path is 1024,
-// other platforms use 4096.  The JCK fails several tests when 1024 is used.
-#define IMAGE_MAX_PATH 4096
-
-// Manage the image file.
-// ImageFileReader manages the content of an image file.
-// Initially, the header of the image file is read for validation.  If valid,
-// values in the header are used calculate the size of the image index.  The
-// index is then memory mapped to allow load on demand and sharing.  The
-// -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.)
-// An image can be used by Hotspot and multiple reference points in the JDK, thus
-// it is desirable to share a reader.  To accomodate sharing, a share table is
-// defined (see ImageFileReaderTable in imageFile.cpp)  To track the number of
-// uses, ImageFileReader keeps a use count (_use).  Use is incremented when
-// 'opened' by reference point and decremented when 'closed'.  Use of zero
-// leads the ImageFileReader to be actually closed and discarded.
-class ImageFileReader : public CHeapObj<mtClass> {
-private:
-  // Manage a number of image files such that an image can be shared across
-  // multiple uses (ex. loader.)
-  static GrowableArray<ImageFileReader*>* _reader_table;
-
-  char* _name;         // Name of image
-  s4 _use;             // Use count
-  int _fd;             // File descriptor
-  Endian* _endian;     // Endian handler
-  u8 _file_size;       // File size in bytes
-  ImageHeader _header; // Image header
-  size_t _index_size;  // Total size of index
-  u1* _index_data;     // Raw index data
-  s4* _redirect_table; // Perfect hash redirect table
-  u4* _offsets_table;  // Location offset table
-  u1* _location_bytes; // Location attributes
-  u1* _string_bytes;   // String table
-
-  ImageFileReader(const char* name, bool big_endian);
-  ~ImageFileReader();
-
-  // Compute number of bytes in image file index.
-  inline u8 index_size() {
-    return sizeof(ImageHeader) +
-      table_length() * sizeof(u4) * 2 + locations_size() + strings_size();
-  }
-
-public:
-  enum {
-    // Image file marker.
-    IMAGE_MAGIC = 0xCAFEDADA,
-    // Endian inverted Image file marker.
-    IMAGE_MAGIC_INVERT = 0xDADAFECA,
-    // Image file major version number.
-    MAJOR_VERSION = 1,
-    // Image file minor version number.
-    MINOR_VERSION = 0
-  };
-
-  // Open an image file, reuse structure if file already open.
-  static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian());
-
-  // Close an image file if the file is not in use elsewhere.
-  static void close(ImageFileReader *reader);
-
-  // Return an id for the specifed ImageFileReader.
-  static u8 readerToID(ImageFileReader *reader);
-
-  // Validate the image id.
-  static bool idCheck(u8 id);
-
-  // Return an id for the specifed ImageFileReader.
-  static ImageFileReader* idToReader(u8 id);
-
-  // Open image file for read access.
-  bool open();
-
-  // Close image file.
-  void close();
-
-  // Read directly from the file.
-  bool read_at(u1* data, u8 size, u8 offset) const;
-
-  inline Endian* endian() const { return _endian; }
-
-  // Retrieve name of image file.
-  inline const char* name() const {
-    return _name;
-  }
-
-  // Retrieve size of image file.
-  inline u8 file_size() const {
-    return _file_size;
-  }
-
-  // Return first address of index data.
-  inline u1* get_index_address() const {
-    return _index_data;
-  }
-
-  // Return first address of resource data.
-  inline u1* get_data_address() const {
-    return _index_data + _index_size;
-  }
-
-  // Get the size of the index data.
-  size_t get_index_size() const {
-    return _index_size;
-  }
-
-  inline u4 table_length() const {
-    return _header.table_length(_endian);
-  }
-
-  inline u4 locations_size() const {
-    return _header.locations_size(_endian);
-  }
-
-  inline u4 strings_size()const  {
-    return _header.strings_size(_endian);
-  }
-
-  inline u4* offsets_table() const {
-    return _offsets_table;
-  }
-
-  // Increment use count.
-  inline void inc_use() {
-    _use++;
-  }
-
-  // Decrement use count.
-  inline bool dec_use() {
-    return --_use == 0;
-  }
-
-  // Return a string table accessor.
-  inline const ImageStrings get_strings() const {
-    return ImageStrings(_string_bytes, _header.strings_size(_endian));
-  }
-
-  // Return location attribute stream at offset.
-  inline u1* get_location_offset_data(u4 offset) const {
-    guarantee((u4)offset < _header.locations_size(_endian),
-              "offset exceeds location attributes size");
-    return offset != 0 ? _location_bytes + offset : NULL;
-  }
-
-  // Return location attribute stream for location i.
-  inline u1* get_location_data(u4 index) const {
-    guarantee((u4)index < _header.table_length(_endian),
-              "index exceeds location count");
-    u4 offset = _endian->get(_offsets_table[index]);
-
-    return get_location_offset_data(offset);
-  }
-
-  // Find the location attributes associated with the path.  Returns true if
-  // the location is found, false otherwise.
-  bool find_location(const char* path, ImageLocation& location) const;
-
-  // Assemble the location path.
-  void location_path(ImageLocation& location, char* path, size_t max) const;
-
-  // Verify that a found location matches the supplied path.
-  bool verify_location(ImageLocation& location, const char* path) const;
-
-  // Return the resource for the supplied path.
-  void get_resource(ImageLocation& location, u1* uncompressed_data) const;
-};
-#endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP
--- a/hotspot/src/share/vm/classfile/javaClasses.cpp
+++ b/hotspot/src/share/vm/classfile/javaClasses.cpp
@ -29,7 +29,6 @@
 #include "classfile/vmSymbols.hpp"
 #include "code/debugInfo.hpp"
 #include "code/pcDesc.hpp"
-#include "compiler/compilerOracle.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
--- a/hotspot/src/share/vm/classfile/jimage.hpp
+++ b/hotspot/src/share/vm/classfile/jimage.hpp
@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "jni.h"
+
+// Opaque reference to a JImage file.
+class JImageFile;
+// Opaque reference to an image file resource location.
+typedef jlong JImageLocationRef;
+
+// Max path length limit independent of platform.  Windows max path is 1024,
+// other platforms use 4096.  The JCK fails several tests when 1024 is used.
+#define JIMAGE_MAX_PATH 4096
+
+// JImage Error Codes
+
+// The image file is not prefixed with 0xCAFEDADA
+#define JIMAGE_BAD_MAGIC (-1)
+// The image file does not have a compatible (translatable) version
+#define JIMAGE_BAD_VERSION (-2)
+// The image file content is malformed
+#define JIMAGE_CORRUPTED (-3)
+
+/*
+ * JImageOpen - Given the supplied full path file name, open an image file. This
+ * function will also initialize tables and retrieve meta-data necessary to
+ * satisfy other functions in the API. If the image file has been previously
+ * open, a new open request will share memory and resources used by the previous
+ * open. A call to JImageOpen should be balanced by a call to JImageClose, to
+ * release memory and resources used. If the image file is not found or cannot
+ * be open, then NULL is returned and error will contain a reason for the
+ * failure; a positive value for a system error number, negative for a jimage
+ * specific error (see JImage Error Codes.)
+ *
+ *  Ex.
+ *   jint error;
+ *   JImageFile* jimage = (*JImageOpen)(JAVA_HOME "lib/modules/bootmodules.jimage", &error);
+ *   if (image == NULL) {
+ *     tty->print_cr("JImage failed to open: %d", error);
+ *     ...
+ *   }
+ *   ...
+ */
+
+extern "C" JImageFile* JIMAGE_Open(const char *name, jint* error);
+
+typedef JImageFile* (*JImageOpen_t)(const char *name, jint* error);
+
+/*
+ * JImageClose - Given the supplied open image file (see JImageOpen), release
+ * memory and resources used by the open file and close the file. If the image
+ * file is shared by other uses, release and close is deferred until the last use
+ * is also closed.
+ *
+ * Ex.
+ *  (*JImageClose)(image);
+ */
+
+extern "C" void JIMAGE_Close(JImageFile* jimage);
+
+typedef void (*JImageClose_t)(JImageFile* jimage);
+
+
+/*
+ * JImagePackageToModule - Given an open image file (see JImageOpen) and the name
+ * of a package, return the name of module where the package resides. If the
+ * package does not exist in the image file, the function returns NULL.
+ * The resulting string does/should not have to be released. All strings are
+ * utf-8, zero byte terminated.
+ *
+ * Ex.
+ *  const char* package = (*JImagePackageToModule)(image, "java/lang");
+ *  tty->print_cr(package);
+ *  —> java.base
+ */
+
+extern "C" const char * JIMAGE_PackageToModule(JImageFile* jimage, const char* package_name);
+
+typedef const char* (*JImagePackageToModule_t)(JImageFile* jimage, const char* package_name);
+
+
+/*
+ * JImageFindResource - Given an open image file (see JImageOpen), a module
+ * name, a version string and the name of a class/resource, return location
+ * information describing the resource and its size. If no resource is found, the
+ * function returns JIMAGE_NOT_FOUND and the value of size is undefined.
+ * The version number should be "9.0" and is not used in locating the resource.
+ * The resulting location does/should not have to be released.
+ * All strings are utf-8, zero byte terminated.
+ *
+ *  Ex.
+ *   jlong size;
+ *   JImageLocationRef location = (*JImageFindResource)(image, "java.base", "9.0", "java/lang/String.class", &size);
+ */
+extern "C" JImageLocationRef JIMAGE_FindResource(JImageFile* jimage,
+        const char* module_name, const char* version, const char* name,
+        jlong* size);
+
+typedef JImageLocationRef(*JImageFindResource_t)(JImageFile* jimage,
+        const char* module_name, const char* version, const char* name,
+        jlong* size);
+
+
+/*
+ * JImageGetResource - Given an open image file (see JImageOpen), a resource’s
+ * location information (see JImageFindResource), a buffer of appropriate
+ * size and the size, retrieve the bytes associated with the
+ * resource. If the size is less than the resource size then the read is truncated.
+ * If the size is greater than the resource size then the remainder of the buffer
+ * is zero filled.  The function will return the actual size of the resource.
+ *
+ * Ex.
+ *  jlong size;
+ *  JImageLocationRef location = (*JImageFindResource)(image, "java.base", "9.0", "java/lang/String.class", &size);
+ *  char* buffer = new char[size];
+ *  (*JImageGetResource)(image, location, buffer, size);
+ */
+extern "C" jlong JIMAGE_GetResource(JImageFile* jimage, JImageLocationRef location,
+        char* buffer, jlong size);
+
+typedef jlong(*JImageGetResource_t)(JImageFile* jimage, JImageLocationRef location,
+        char* buffer, jlong size);
+
+
+/*
+ * JImageResourceIterator - Given an open image file (see JImageOpen), a visitor
+ * function and a visitor argument, iterator through each of the image's resources.
+ * The visitor function is called with the image file, the module name, the
+ * package name, the base name, the extension and the visitor argument. The return
+ * value of the visitor function should be true, unless an early iteration exit is
+ * required. All strings are utf-8, zero byte terminated.file.
+ *
+ * Ex.
+ *   bool ctw_visitor(JImageFile* jimage, const char* module_name, const char* version, const char* package, const char* name, const char* extension, void* arg) {
+ *     if (strcmp(extension, “class”) == 0) {
+ *       char path[JIMAGE_MAX_PATH];
+ *       Thread* THREAD = Thread::current();
+ *       jio_snprintf(path, JIMAGE_MAX_PATH - 1, "/%s/%s", package, name);
+ *       ClassLoader::compile_the_world_in(path, (Handle)arg, THREAD);
+ *       return !HAS_PENDING_EXCEPTION;
+ *     }
+ *     return true;
+ *   }
+ *   (*JImageResourceIterator)(image, ctw_visitor, loader);
+ */
+
+typedef bool (*JImageResourceVisitor_t)(JImageFile* jimage,
+        const char* module_name, const char* version, const char* package,
+        const char* name, const char* extension, void* arg);
+
+extern "C" void JIMAGE_ResourceIterator(JImageFile* jimage,
+        JImageResourceVisitor_t visitor, void *arg);
+
+typedef void (*JImageResourceIterator_t)(JImageFile* jimage,
+        JImageResourceVisitor_t visitor, void* arg);
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp
@ -625,6 +625,10 @@ bool vmIntrinsics::is_disabled_by_flags(methodHandle method, methodHandle compil
  case vmIntrinsics::_updateDirectByteBufferCRC32C:
    if (!UseCRC32CIntrinsics) return true;
    break;
+  case vmIntrinsics::_updateBytesAdler32:
+  case vmIntrinsics::_updateByteBufferAdler32:
+    if (!UseAdler32Intrinsics) return true;
+    break;
  case vmIntrinsics::_copyMemory:
    if (!InlineArrayCopy || !InlineUnsafeOps) return true;
    break;
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -927,6 +927,12 @@
  do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_C_name, updateByteBuffer_signature, F_S) \
   do_name(    updateDirectByteBuffer_C_name,                     "updateDirectByteBuffer")                             \
                                                                                                                        \
+   /* support for java.util.zip.Adler32 */                                                                              \
+  do_class(java_util_zip_Adler32,        "java/util/zip/Adler32")                                                       \
+  do_intrinsic(_updateBytesAdler32,       java_util_zip_Adler32,  updateBytes_C_name,  updateBytes_signature,  F_SN)    \
+  do_intrinsic(_updateByteBufferAdler32,  java_util_zip_Adler32,  updateByteBuffer_A_name,  updateByteBuffer_signature,  F_SN) \
+   do_name(     updateByteBuffer_A_name,                          "updateByteBuffer")                                   \
+                                                                                                                        \
  /* support for sun.misc.Unsafe */                                                                                     \
  do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                        \
--- a/hotspot/src/share/vm/code/codeCache.cpp
+++ b/hotspot/src/share/vm/code/codeCache.cpp
@ -745,13 +745,12 @@ void CodeCache::gc_prologue() {

 void CodeCache::gc_epilogue() {
  assert_locked_or_safepoint(CodeCache_lock);
-  NMethodIterator iter;
-  while(iter.next()) {
-    nmethod* nm = iter.method();
-    if (!nm->is_zombie()) {
-      if (needs_cache_clean()) {
-        // Clean ICs of unloaded nmethods as well because they may reference other
-        // unloaded nmethods that may be flushed earlier in the sweeper cycle.
+  NOT_DEBUG(if (needs_cache_clean())) {
+    NMethodIterator iter;
+    while(iter.next_alive()) {
+      nmethod* nm = iter.method();
+      assert(!nm->is_unloaded(), "Tautology");
+      DEBUG_ONLY(if (needs_cache_clean())) {
        nm->cleanup_inline_caches();
      }
      DEBUG_ONLY(nm->verify());
--- a/hotspot/src/share/vm/code/compiledIC.cpp
+++ b/hotspot/src/share/vm/code/compiledIC.cpp
@ -287,6 +287,7 @@ bool CompiledIC::is_call_to_compiled() const {
  assert( is_c1_method ||
         !is_monomorphic ||
         is_optimized() ||
+         !caller->is_alive() ||
         (cached_metadata() != NULL && cached_metadata()->is_klass()), "sanity check");
 #endif // ASSERT
  return is_monomorphic;
@ -321,7 +322,7 @@ bool CompiledIC::is_call_to_interpreted() const {
 }


-void CompiledIC::set_to_clean() {
+void CompiledIC::set_to_clean(bool in_use) {
  assert(SafepointSynchronize::is_at_safepoint() || CompiledIC_lock->is_locked() , "MT-unsafe call");
  if (TraceInlineCacheClearing || TraceICs) {
    tty->print_cr("IC@" INTPTR_FORMAT ": set to clean", p2i(instruction_address()));
@ -337,7 +338,7 @@ void CompiledIC::set_to_clean() {

  // A zombie transition will always be safe, since the metadata has already been set to NULL, so
  // we only need to patch the destination
-  bool safe_transition = is_optimized() || SafepointSynchronize::is_at_safepoint();
+  bool safe_transition = !in_use || is_optimized() || SafepointSynchronize::is_at_safepoint();

  if (safe_transition) {
    // Kill any leftover stub we might have too
--- a/hotspot/src/share/vm/code/compiledIC.hpp
+++ b/hotspot/src/share/vm/code/compiledIC.hpp
@ -214,7 +214,7 @@ class CompiledIC: public ResourceObj {
  //
  // They all takes a TRAP argument, since they can cause a GC if the inline-cache buffer is full.
  //
-  void set_to_clean();
+  void set_to_clean(bool in_use = true);
  void set_to_monomorphic(CompiledICInfo& info);
  void clear_ic_stub();

--- a/hotspot/src/share/vm/code/nmethod.cpp
+++ b/hotspot/src/share/vm/code/nmethod.cpp
@ -848,10 +848,10 @@ void nmethod::print_on(outputStream* st, const char* msg) const {
  if (st != NULL) {
    ttyLocker ttyl;
    if (WizardMode) {
-      CompileTask::print_compilation(st, this, msg, /*short_form:*/ true);
+      CompileTask::print(st, this, msg, /*short_form:*/ true);
      st->print_cr(" (" INTPTR_FORMAT ")", this);
    } else {
-      CompileTask::print_compilation(st, this, msg, /*short_form:*/ false);
+      CompileTask::print(st, this, msg, /*short_form:*/ false);
    }
  }
 }
@ -1050,7 +1050,7 @@ void nmethod::cleanup_inline_caches() {
        if( cb != NULL && cb->is_nmethod() ) {
          nmethod* nm = (nmethod*)cb;
          // Clean inline caches pointing to zombie, non-entrant and unloaded methods
-          if (!nm->is_in_use() || (nm->method()->code() != nm)) ic->set_to_clean();
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) ic->set_to_clean(is_alive());
        }
        break;
      }
@ -1150,7 +1150,7 @@ void nmethod::mark_as_seen_on_stack() {
 // Tell if a non-entrant method can be converted to a zombie (i.e.,
 // there are no activations on the stack, not in use by the VM,
 // and not in use by the ServiceThread)
-bool nmethod::can_not_entrant_be_converted() {
+bool nmethod::can_convert_to_zombie() {
  assert(is_not_entrant(), "must be a non-entrant method");

  // Since the nmethod sweeper only does partial sweep the sweeper's traversal
--- a/hotspot/src/share/vm/code/nmethod.hpp
+++ b/hotspot/src/share/vm/code/nmethod.hpp
@ -577,7 +577,7 @@ public:

  // See comment at definition of _last_seen_on_stack
  void mark_as_seen_on_stack();
-  bool can_not_entrant_be_converted();
+  bool can_convert_to_zombie();

  // Evolution support. We make old (discarded) compiled methods point to new Method*s.
  void set_method(Method* method) { _method = method; }
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp
@ -157,7 +157,6 @@ long CompileBroker::_peak_compilation_time       = 0;
 CompileQueue* CompileBroker::_c2_compile_queue   = NULL;
 CompileQueue* CompileBroker::_c1_compile_queue   = NULL;

-
 class CompilationLog : public StringEventLog {
 public:
  CompilationLog() : StringEventLog("Compilation events") {
@ -167,7 +166,7 @@ class CompilationLog : public StringEventLog {
    StringLogMessage lm;
    stringStream sstr = lm.stream();
    // msg.time_stamp().update_to(tty->time_stamp().ticks());
-    task->print_compilation(&sstr, NULL, true, false);
+    task->print(&sstr, NULL, true, false);
    log(thread, "%s", (const char*)lm);
  }

@ -233,371 +232,6 @@ CompileTaskWrapper::~CompileTaskWrapper() {
  }
 }

-
-CompileTask*  CompileTask::_task_free_list = NULL;
-#ifdef ASSERT
-int CompileTask::_num_allocated_tasks = 0;
-#endif
-/**
- * Allocate a CompileTask, from the free list if possible.
- */
-CompileTask* CompileTask::allocate() {
-  MutexLocker locker(CompileTaskAlloc_lock);
-  CompileTask* task = NULL;
-
-  if (_task_free_list != NULL) {
-    task = _task_free_list;
-    _task_free_list = task->next();
-    task->set_next(NULL);
-  } else {
-    task = new CompileTask();
-    DEBUG_ONLY(_num_allocated_tasks++;)
-    assert (WhiteBoxAPI || _num_allocated_tasks < 10000, "Leaking compilation tasks?");
-    task->set_next(NULL);
-    task->set_is_free(true);
-  }
-  assert(task->is_free(), "Task must be free.");
-  task->set_is_free(false);
-  return task;
-}
-
-
-/**
- * Add a task to the free list.
- */
-void CompileTask::free(CompileTask* task) {
-  MutexLocker locker(CompileTaskAlloc_lock);
-  if (!task->is_free()) {
-    task->set_code(NULL);
-    assert(!task->lock()->is_locked(), "Should not be locked when freed");
-    JNIHandles::destroy_global(task->_method_holder);
-    JNIHandles::destroy_global(task->_hot_method_holder);
-
-    task->set_is_free(true);
-    task->set_next(_task_free_list);
-    _task_free_list = task;
-  }
-}
-
-void CompileTask::initialize(int compile_id,
-                             methodHandle method,
-                             int osr_bci,
-                             int comp_level,
-                             methodHandle hot_method,
-                             int hot_count,
-                             const char* comment,
-                             bool is_blocking) {
-  assert(!_lock->is_locked(), "bad locking");
-
-  _compile_id = compile_id;
-  _method = method();
-  _method_holder = JNIHandles::make_global(method->method_holder()->klass_holder());
-  _osr_bci = osr_bci;
-  _is_blocking = is_blocking;
-  _comp_level = comp_level;
-  _num_inlined_bytecodes = 0;
-
-  _is_complete = false;
-  _is_success = false;
-  _code_handle = NULL;
-
-  _hot_method = NULL;
-  _hot_method_holder = NULL;
-  _hot_count = hot_count;
-  _time_queued = 0;  // tidy
-  _comment = comment;
-  _failure_reason = NULL;
-
-  if (LogCompilation) {
-    _time_queued = os::elapsed_counter();
-    if (hot_method.not_null()) {
-      if (hot_method == method) {
-        _hot_method = _method;
-      } else {
-        _hot_method = hot_method();
-        // only add loader or mirror if different from _method_holder
-        _hot_method_holder = JNIHandles::make_global(hot_method->method_holder()->klass_holder());
-      }
-    }
-  }
-
-  _next = NULL;
-}
-
-// ------------------------------------------------------------------
-// CompileTask::code/set_code
-nmethod* CompileTask::code() const {
-  if (_code_handle == NULL)  return NULL;
-  return _code_handle->code();
-}
-void CompileTask::set_code(nmethod* nm) {
-  if (_code_handle == NULL && nm == NULL)  return;
-  guarantee(_code_handle != NULL, "");
-  _code_handle->set_code(nm);
-  if (nm == NULL)  _code_handle = NULL;  // drop the handle also
-}
-
-void CompileTask::mark_on_stack() {
-  // Mark these methods as something redefine classes cannot remove.
-  _method->set_on_stack(true);
-  if (_hot_method != NULL) {
-    _hot_method->set_on_stack(true);
-  }
-}
-
-// RedefineClasses support
-void CompileTask::metadata_do(void f(Metadata*)) {
-  f(method());
-  if (hot_method() != NULL && hot_method() != method()) {
-    f(hot_method());
-  }
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_line_on_error
-//
-// This function is called by fatal error handler when the thread
-// causing troubles is a compiler thread.
-//
-// Do not grab any lock, do not allocate memory.
-//
-// Otherwise it's the same as CompileTask::print_line()
-//
-void CompileTask::print_line_on_error(outputStream* st, char* buf, int buflen) {
-  // print compiler name
-  st->print("%s:", CompileBroker::compiler_name(comp_level()));
-  print_compilation(st);
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_line
-void CompileTask::print_tty() {
-  ttyLocker ttyl;  // keep the following output all in one block
-  // print compiler name if requested
-  if (CIPrintCompilerName) tty->print("%s:", CompileBroker::compiler_name(comp_level()));
-    print_compilation(tty);
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_compilation_impl
-void CompileTask::print_compilation_impl(outputStream* st, Method* method, int compile_id, int comp_level,
-                                         bool is_osr_method, int osr_bci, bool is_blocking,
-                                         const char* msg, bool short_form, bool cr) {
-  if (!short_form) {
-    st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
-  }
-  st->print("%4d ", compile_id);    // print compilation number
-
-  // For unloaded methods the transition to zombie occurs after the
-  // method is cleared so it's impossible to report accurate
-  // information for that case.
-  bool is_synchronized = false;
-  bool has_exception_handler = false;
-  bool is_native = false;
-  if (method != NULL) {
-    is_synchronized       = method->is_synchronized();
-    has_exception_handler = method->has_exception_handler();
-    is_native             = method->is_native();
-  }
-  // method attributes
-  const char compile_type   = is_osr_method                   ? '%' : ' ';
-  const char sync_char      = is_synchronized                 ? 's' : ' ';
-  const char exception_char = has_exception_handler           ? '!' : ' ';
-  const char blocking_char  = is_blocking                     ? 'b' : ' ';
-  const char native_char    = is_native                       ? 'n' : ' ';
-
-  // print method attributes
-  st->print("%c%c%c%c%c ", compile_type, sync_char, exception_char, blocking_char, native_char);
-
-  if (TieredCompilation) {
-    if (comp_level != -1)  st->print("%d ", comp_level);
-    else                   st->print("- ");
-  }
-  st->print("     ");  // more indent
-
-  if (method == NULL) {
-    st->print("(method)");
-  } else {
-    method->print_short_name(st);
-    if (is_osr_method) {
-      st->print(" @ %d", osr_bci);
-    }
-    if (method->is_native())
-      st->print(" (native)");
-    else
-      st->print(" (%d bytes)", method->code_size());
-  }
-
-  if (msg != NULL) {
-    st->print("   %s", msg);
-  }
-  if (cr) {
-    st->cr();
-  }
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_inlining
-void CompileTask::print_inlining(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg) {
-  //         1234567
-  st->print("        ");     // print timestamp
-  //         1234
-  st->print("     ");        // print compilation number
-
-  // method attributes
-  if (method->is_loaded()) {
-    const char sync_char      = method->is_synchronized()        ? 's' : ' ';
-    const char exception_char = method->has_exception_handlers() ? '!' : ' ';
-    const char monitors_char  = method->has_monitor_bytecodes()  ? 'm' : ' ';
-
-    // print method attributes
-    st->print(" %c%c%c  ", sync_char, exception_char, monitors_char);
-  } else {
-    //         %s!bn
-    st->print("      ");     // print method attributes
-  }
-
-  if (TieredCompilation) {
-    st->print("  ");
-  }
-  st->print("     ");        // more indent
-  st->print("    ");         // initial inlining indent
-
-  for (int i = 0; i < inline_level; i++)  st->print("  ");
-
-  st->print("@ %d  ", bci);  // print bci
-  method->print_short_name(st);
-  if (method->is_loaded())
-    st->print(" (%d bytes)", method->code_size());
-  else
-    st->print(" (not loaded)");
-
-  if (msg != NULL) {
-    st->print("   %s", msg);
-  }
-  st->cr();
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_inline_indent
-void CompileTask::print_inline_indent(int inline_level, outputStream* st) {
-  //         1234567
-  st->print("        ");     // print timestamp
-  //         1234
-  st->print("     ");        // print compilation number
-  //         %s!bn
-  st->print("      ");       // print method attributes
-  if (TieredCompilation) {
-    st->print("  ");
-  }
-  st->print("     ");        // more indent
-  st->print("    ");         // initial inlining indent
-  for (int i = 0; i < inline_level; i++)  st->print("  ");
-}
-
-// ------------------------------------------------------------------
-// CompileTask::print_compilation
-void CompileTask::print_compilation(outputStream* st, const char* msg, bool short_form, bool cr) {
-  bool is_osr_method = osr_bci() != InvocationEntryBci;
-  print_compilation_impl(st, method(), compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking(), msg, short_form, cr);
-}
-
-// ------------------------------------------------------------------
-// CompileTask::log_task
-void CompileTask::log_task(xmlStream* log) {
-  Thread* thread = Thread::current();
-  methodHandle method(thread, this->method());
-  ResourceMark rm(thread);
-
-  // <task compiler='Cx' id='9' method='M' osr_bci='X' level='1' blocking='1' stamp='1.234'>
-  log->print(" compiler='%s' compile_id='%d'", _comp_level <= CompLevel_full_profile ? "C1" : "C2", _compile_id);
-  if (_osr_bci != CompileBroker::standard_entry_bci) {
-    log->print(" compile_kind='osr'");  // same as nmethod::compile_kind
-  } // else compile_kind='c2c'
-  if (!method.is_null())  log->method(method);
-  if (_osr_bci != CompileBroker::standard_entry_bci) {
-    log->print(" osr_bci='%d'", _osr_bci);
-  }
-  if (_comp_level != CompLevel_highest_tier) {
-    log->print(" level='%d'", _comp_level);
-  }
-  if (_is_blocking) {
-    log->print(" blocking='1'");
-  }
-  log->stamp();
-}
-
-
-// ------------------------------------------------------------------
-// CompileTask::log_task_queued
-void CompileTask::log_task_queued() {
-  Thread* thread = Thread::current();
-  ttyLocker ttyl;
-  ResourceMark rm(thread);
-
-  xtty->begin_elem("task_queued");
-  log_task(xtty);
-  if (_comment != NULL) {
-    xtty->print(" comment='%s'", _comment);
-  }
-  if (_hot_method != NULL) {
-    methodHandle hot(thread, _hot_method);
-    methodHandle method(thread, _method);
-    if (hot() != method()) {
-      xtty->method(hot);
-    }
-  }
-  if (_hot_count != 0) {
-    xtty->print(" hot_count='%d'", _hot_count);
-  }
-  xtty->end_elem();
-}
-
-
-// ------------------------------------------------------------------
-// CompileTask::log_task_start
-void CompileTask::log_task_start(CompileLog* log)   {
-  log->begin_head("task");
-  log_task(log);
-  log->end_head();
-}
-
-
-// ------------------------------------------------------------------
-// CompileTask::log_task_done
-void CompileTask::log_task_done(CompileLog* log) {
-  Thread* thread = Thread::current();
-  methodHandle method(thread, this->method());
-  ResourceMark rm(thread);
-
-  if (!_is_success) {
-    const char* reason = _failure_reason != NULL ? _failure_reason : "unknown";
-    log->elem("failure reason='%s'", reason);
-  }
-
-  // <task_done ... stamp='1.234'>  </task>
-  nmethod* nm = code();
-  log->begin_elem("task_done success='%d' nmsize='%d' count='%d'",
-                  _is_success, nm == NULL ? 0 : nm->content_size(),
-                  method->invocation_count());
-  int bec = method->backedge_count();
-  if (bec != 0)  log->print(" backedge_count='%d'", bec);
-  // Note:  "_is_complete" is about to be set, but is not.
-  if (_num_inlined_bytecodes != 0) {
-    log->print(" inlined_bytes='%d'", _num_inlined_bytecodes);
-  }
-  log->stamp();
-  log->end_elem();
-  log->tail("task");
-  log->clear_identities();   // next task will have different CI
-  if (log->unflushed_count() > 2000) {
-    log->flush();
-  }
-  log->mark_file_end();
-}
-
-
-
 /**
 * Add a CompileTask to a CompileQueue.
 */
@ -807,7 +441,7 @@ void CompileQueue::print(outputStream* st) {
    st->print_cr("Empty");
  } else {
    while (task != NULL) {
-      task->print_compilation(st, NULL, true, true);
+      task->print(st, NULL, true, true);
      task = task->next();
    }
  }
@ -1349,7 +983,7 @@ nmethod* CompileBroker::compile_method(methodHandle method, int osr_bci,
 #ifndef TIERED
    // seems like an assert of dubious value
    assert(comp_level == CompLevel_highest_tier,
-           "all OSR compiles are assumed to be at a single compilation lavel");
+           "all OSR compiles are assumed to be at a single compilation level");
 #endif // TIERED
    // We accept a higher level osr method
    nmethod* nm = method->lookup_osr_nmethod_for(osr_bci, comp_level, false);
@ -2037,7 +1671,7 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) {
        FormatBufferResource msg = retry_message != NULL ?
            err_msg_res("COMPILE SKIPPED: %s (%s)", ci_env.failure_reason(), retry_message) :
            err_msg_res("COMPILE SKIPPED: %s",      ci_env.failure_reason());
-        task->print_compilation(tty, msg);
+        task->print(tty, msg);
      }
    } else {
      task->mark_success();
--- a/hotspot/src/share/vm/compiler/compileBroker.hpp
+++ b/hotspot/src/share/vm/compiler/compileBroker.hpp
@ -27,127 +27,12 @@

 #include "ci/compilerInterface.hpp"
 #include "compiler/abstractCompiler.hpp"
+#include "compiler/compileTask.hpp"
 #include "runtime/perfData.hpp"

 class nmethod;
 class nmethodLocker;

-// CompileTask
-//
-// An entry in the compile queue.  It represents a pending or current
-// compilation.
-class CompileTask : public CHeapObj<mtCompiler> {
-  friend class VMStructs;
-
- private:
-  static CompileTask* _task_free_list;
-#ifdef ASSERT
-  static int          _num_allocated_tasks;
-#endif
-
-  Monitor*     _lock;
-  uint         _compile_id;
-  Method*      _method;
-  jobject      _method_holder;
-  int          _osr_bci;
-  bool         _is_complete;
-  bool         _is_success;
-  bool         _is_blocking;
-  int          _comp_level;
-  int          _num_inlined_bytecodes;
-  nmethodLocker* _code_handle;  // holder of eventual result
-  CompileTask* _next, *_prev;
-  bool         _is_free;
-  // Fields used for logging why the compilation was initiated:
-  jlong        _time_queued;  // in units of os::elapsed_counter()
-  Method*      _hot_method;   // which method actually triggered this task
-  jobject      _hot_method_holder;
-  int          _hot_count;    // information about its invocation counter
-  const char*  _comment;      // more info about the task
-  const char*  _failure_reason;
-
- public:
-  CompileTask() {
-    _lock = new Monitor(Mutex::nonleaf+2, "CompileTaskLock");
-  }
-
-  void initialize(int compile_id, methodHandle method, int osr_bci, int comp_level,
-                  methodHandle hot_method, int hot_count, const char* comment,
-                  bool is_blocking);
-
-  static CompileTask* allocate();
-  static void         free(CompileTask* task);
-
-  int          compile_id() const                { return _compile_id; }
-  Method*      method() const                    { return _method; }
-  Method*      hot_method() const                { return _hot_method; }
-  int          osr_bci() const                   { return _osr_bci; }
-  bool         is_complete() const               { return _is_complete; }
-  bool         is_blocking() const               { return _is_blocking; }
-  bool         is_success() const                { return _is_success; }
-
-  nmethodLocker* code_handle() const             { return _code_handle; }
-  void         set_code_handle(nmethodLocker* l) { _code_handle = l; }
-  nmethod*     code() const;                     // _code_handle->code()
-  void         set_code(nmethod* nm);            // _code_handle->set_code(nm)
-
-  Monitor*     lock() const                      { return _lock; }
-
-  void         mark_complete()                   { _is_complete = true; }
-  void         mark_success()                    { _is_success = true; }
-
-  int          comp_level()                      { return _comp_level;}
-  void         set_comp_level(int comp_level)    { _comp_level = comp_level;}
-
-  int          num_inlined_bytecodes() const     { return _num_inlined_bytecodes; }
-  void         set_num_inlined_bytecodes(int n)  { _num_inlined_bytecodes = n; }
-
-  CompileTask* next() const                      { return _next; }
-  void         set_next(CompileTask* next)       { _next = next; }
-  CompileTask* prev() const                      { return _prev; }
-  void         set_prev(CompileTask* prev)       { _prev = prev; }
-  bool         is_free() const                   { return _is_free; }
-  void         set_is_free(bool val)             { _is_free = val; }
-
-  // RedefineClasses support
-  void         metadata_do(void f(Metadata*));
-
-private:
-  static void  print_compilation_impl(outputStream* st, Method* method, int compile_id, int comp_level,
-                                      bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false,
-                                      const char* msg = NULL, bool short_form = false, bool cr = true);
-
-public:
-  void         print_compilation(outputStream* st = tty, const char* msg = NULL, bool short_form = false, bool cr = true);
-  static void  print_compilation(outputStream* st, const nmethod* nm, const char* msg = NULL, bool short_form = false, bool cr = true) {
-    print_compilation_impl(st, nm->method(), nm->compile_id(), nm->comp_level(),
-                           nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false,
-                           msg, short_form, cr);
-  }
-
-  static void  print_inlining(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg = NULL);
-  static void  print_inlining(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
-    print_inlining(tty, method, inline_level, bci, msg);
-  }
-
-  // Redefine Classes support
-  void mark_on_stack();
-
-  static void  print_inline_indent(int inline_level, outputStream* st = tty);
-
-  void         print_tty();
-  void         print_line_on_error(outputStream* st, char* buf, int buflen);
-
-  void         log_task(xmlStream* log);
-  void         log_task_queued();
-  void         log_task_start(CompileLog* log);
-  void         log_task_done(CompileLog* log);
-
-  void         set_failure_reason(const char* reason) {
-    _failure_reason = reason;
-  }
-};
-
 // CompilerCounters
 //
 // Per Compiler Performance Counters.
--- a/hotspot/src/share/vm/compiler/compileTask.cpp
+++ b/hotspot/src/share/vm/compiler/compileTask.cpp
@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/compileTask.hpp"
+#include "compiler/compileLog.hpp"
+#include "compiler/compileBroker.hpp"
+
+CompileTask*  CompileTask::_task_free_list = NULL;
+#ifdef ASSERT
+int CompileTask::_num_allocated_tasks = 0;
+#endif
+
+/**
+ * Allocate a CompileTask, from the free list if possible.
+ */
+CompileTask* CompileTask::allocate() {
+  MutexLocker locker(CompileTaskAlloc_lock);
+  CompileTask* task = NULL;
+
+  if (_task_free_list != NULL) {
+    task = _task_free_list;
+    _task_free_list = task->next();
+    task->set_next(NULL);
+  } else {
+    task = new CompileTask();
+    DEBUG_ONLY(_num_allocated_tasks++;)
+    assert (WhiteBoxAPI || _num_allocated_tasks < 10000, "Leaking compilation tasks?");
+    task->set_next(NULL);
+    task->set_is_free(true);
+  }
+  assert(task->is_free(), "Task must be free.");
+  task->set_is_free(false);
+  return task;
+}
+
+/**
+* Add a task to the free list.
+*/
+
+void CompileTask::free(CompileTask* task) {
+ MutexLocker locker(CompileTaskAlloc_lock);
+ if (!task->is_free()) {
+   task->set_code(NULL);
+   assert(!task->lock()->is_locked(), "Should not be locked when freed");
+   JNIHandles::destroy_global(task->_method_holder);
+   JNIHandles::destroy_global(task->_hot_method_holder);
+
+   task->set_is_free(true);
+   task->set_next(_task_free_list);
+   _task_free_list = task;
+ }
+}
+
+
+void CompileTask::initialize(int compile_id,
+                             methodHandle method,
+                             int osr_bci,
+                             int comp_level,
+                             methodHandle hot_method,
+                             int hot_count,
+                             const char* comment,
+                             bool is_blocking) {
+  assert(!_lock->is_locked(), "bad locking");
+
+  _compile_id = compile_id;
+  _method = method();
+  _method_holder = JNIHandles::make_global(method->method_holder()->klass_holder());
+  _osr_bci = osr_bci;
+  _is_blocking = is_blocking;
+  _comp_level = comp_level;
+  _num_inlined_bytecodes = 0;
+
+  _is_complete = false;
+  _is_success = false;
+  _code_handle = NULL;
+
+  _hot_method = NULL;
+  _hot_method_holder = NULL;
+  _hot_count = hot_count;
+  _time_queued = 0;  // tidy
+  _comment = comment;
+  _failure_reason = NULL;
+
+  if (LogCompilation) {
+    _time_queued = os::elapsed_counter();
+    if (hot_method.not_null()) {
+      if (hot_method == method) {
+        _hot_method = _method;
+      } else {
+        _hot_method = hot_method();
+        // only add loader or mirror if different from _method_holder
+        _hot_method_holder = JNIHandles::make_global(hot_method->method_holder()->klass_holder());
+      }
+    }
+  }
+
+  _next = NULL;
+}
+
+// ------------------------------------------------------------------
+// CompileTask::code/set_code
+//
+nmethod* CompileTask::code() const {
+  if (_code_handle == NULL)  return NULL;
+  return _code_handle->code();
+}
+
+void CompileTask::set_code(nmethod* nm) {
+  if (_code_handle == NULL && nm == NULL)  return;
+  guarantee(_code_handle != NULL, "");
+  _code_handle->set_code(nm);
+  if (nm == NULL)  _code_handle = NULL;  // drop the handle also
+}
+
+void CompileTask::mark_on_stack() {
+  // Mark these methods as something redefine classes cannot remove.
+  _method->set_on_stack(true);
+  if (_hot_method != NULL) {
+    _hot_method->set_on_stack(true);
+  }
+}
+
+// RedefineClasses support
+void CompileTask::metadata_do(void f(Metadata*)) {
+  f(method());
+  if (hot_method() != NULL && hot_method() != method()) {
+    f(hot_method());
+  }
+}
+
+// ------------------------------------------------------------------
+// CompileTask::print_line_on_error
+//
+// This function is called by fatal error handler when the thread
+// causing troubles is a compiler thread.
+//
+// Do not grab any lock, do not allocate memory.
+//
+// Otherwise it's the same as CompileTask::print_line()
+//
+void CompileTask::print_line_on_error(outputStream* st, char* buf, int buflen) {
+  // print compiler name
+  st->print("%s:", CompileBroker::compiler_name(comp_level()));
+  print(st);
+}
+
+// ------------------------------------------------------------------
+// CompileTask::print_tty
+void CompileTask::print_tty() {
+  ttyLocker ttyl;  // keep the following output all in one block
+  // print compiler name if requested
+  if (CIPrintCompilerName) tty->print("%s:", CompileBroker::compiler_name(comp_level()));
+    print(tty);
+}
+
+// ------------------------------------------------------------------
+// CompileTask::print_impl
+void CompileTask::print_impl(outputStream* st, Method* method, int compile_id, int comp_level,
+                                         bool is_osr_method, int osr_bci, bool is_blocking,
+                                         const char* msg, bool short_form, bool cr) {
+  if (!short_form) {
+    st->print("%7d ", (int) st->time_stamp().milliseconds());  // print timestamp
+  }
+  st->print("%4d ", compile_id);    // print compilation number
+
+  // For unloaded methods the transition to zombie occurs after the
+  // method is cleared so it's impossible to report accurate
+  // information for that case.
+  bool is_synchronized = false;
+  bool has_exception_handler = false;
+  bool is_native = false;
+  if (method != NULL) {
+    is_synchronized       = method->is_synchronized();
+    has_exception_handler = method->has_exception_handler();
+    is_native             = method->is_native();
+  }
+  // method attributes
+  const char compile_type   = is_osr_method                   ? '%' : ' ';
+  const char sync_char      = is_synchronized                 ? 's' : ' ';
+  const char exception_char = has_exception_handler           ? '!' : ' ';
+  const char blocking_char  = is_blocking                     ? 'b' : ' ';
+  const char native_char    = is_native                       ? 'n' : ' ';
+
+  // print method attributes
+  st->print("%c%c%c%c%c ", compile_type, sync_char, exception_char, blocking_char, native_char);
+
+  if (TieredCompilation) {
+    if (comp_level != -1)  st->print("%d ", comp_level);
+    else                   st->print("- ");
+  }
+  st->print("     ");  // more indent
+
+  if (method == NULL) {
+    st->print("(method)");
+  } else {
+    method->print_short_name(st);
+    if (is_osr_method) {
+      st->print(" @ %d", osr_bci);
+    }
+    if (method->is_native())
+      st->print(" (native)");
+    else
+      st->print(" (%d bytes)", method->code_size());
+  }
+
+  if (msg != NULL) {
+    st->print("   %s", msg);
+  }
+  if (cr) {
+    st->cr();
+  }
+}
+
+void CompileTask::print_inline_indent(int inline_level, outputStream* st) {
+  //         1234567
+  st->print("        ");     // print timestamp
+  //         1234
+  st->print("     ");        // print compilation number
+  //         %s!bn
+  st->print("      ");       // print method attributes
+  if (TieredCompilation) {
+    st->print("  ");
+  }
+  st->print("     ");        // more indent
+  st->print("    ");         // initial inlining indent
+  for (int i = 0; i < inline_level; i++)  st->print("  ");
+}
+
+// ------------------------------------------------------------------
+// CompileTask::print_compilation
+void CompileTask::print(outputStream* st, const char* msg, bool short_form, bool cr) {
+  bool is_osr_method = osr_bci() != InvocationEntryBci;
+  print_impl(st, method(), compile_id(), comp_level(), is_osr_method, osr_bci(), is_blocking(), msg, short_form, cr);
+}
+
+// ------------------------------------------------------------------
+// CompileTask::log_task
+void CompileTask::log_task(xmlStream* log) {
+  Thread* thread = Thread::current();
+  methodHandle method(thread, this->method());
+  ResourceMark rm(thread);
+
+  // <task id='9' method='M' osr_bci='X' level='1' blocking='1' stamp='1.234'>
+  log->print(" compile_id='%d'", _compile_id);
+  if (_osr_bci != CompileBroker::standard_entry_bci) {
+    log->print(" compile_kind='osr'");  // same as nmethod::compile_kind
+  } // else compile_kind='c2c'
+  if (!method.is_null())  log->method(method);
+  if (_osr_bci != CompileBroker::standard_entry_bci) {
+    log->print(" osr_bci='%d'", _osr_bci);
+  }
+  if (_comp_level != CompLevel_highest_tier) {
+    log->print(" level='%d'", _comp_level);
+  }
+  if (_is_blocking) {
+    log->print(" blocking='1'");
+  }
+  log->stamp();
+}
+
+// ------------------------------------------------------------------
+// CompileTask::log_task_queued
+void CompileTask::log_task_queued() {
+  Thread* thread = Thread::current();
+  ttyLocker ttyl;
+  ResourceMark rm(thread);
+
+  xtty->begin_elem("task_queued");
+  log_task(xtty);
+  if (_comment != NULL) {
+    xtty->print(" comment='%s'", _comment);
+  }
+  if (_hot_method != NULL) {
+    methodHandle hot(thread, _hot_method);
+    methodHandle method(thread, _method);
+    if (hot() != method()) {
+      xtty->method(hot);
+    }
+  }
+  if (_hot_count != 0) {
+    xtty->print(" hot_count='%d'", _hot_count);
+  }
+  xtty->end_elem();
+}
+
+
+// ------------------------------------------------------------------
+// CompileTask::log_task_start
+void CompileTask::log_task_start(CompileLog* log)   {
+  log->begin_head("task");
+  log_task(log);
+  log->end_head();
+}
+
+
+// ------------------------------------------------------------------
+// CompileTask::log_task_done
+void CompileTask::log_task_done(CompileLog* log) {
+  Thread* thread = Thread::current();
+  methodHandle method(thread, this->method());
+  ResourceMark rm(thread);
+
+  if (!_is_success) {
+    const char* reason = _failure_reason != NULL ? _failure_reason : "unknown";
+    log->elem("failure reason='%s'", reason);
+  }
+
+  // <task_done ... stamp='1.234'>  </task>
+  nmethod* nm = code();
+  log->begin_elem("task_done success='%d' nmsize='%d' count='%d'",
+                  _is_success, nm == NULL ? 0 : nm->content_size(),
+                  method->invocation_count());
+  int bec = method->backedge_count();
+  if (bec != 0)  log->print(" backedge_count='%d'", bec);
+  // Note:  "_is_complete" is about to be set, but is not.
+  if (_num_inlined_bytecodes != 0) {
+    log->print(" inlined_bytes='%d'", _num_inlined_bytecodes);
+  }
+  log->stamp();
+  log->end_elem();
+  log->clear_identities();   // next task will have different CI
+  log->tail("task");
+  if (log->unflushed_count() > 2000) {
+    log->flush();
+  }
+  log->mark_file_end();
+}
+
+// ------------------------------------------------------------------
+// CompileTask::print_inlining
+void CompileTask::print_inlining_inner(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg) {
+  //         1234567
+  st->print("        ");     // print timestamp
+  //         1234
+  st->print("     ");        // print compilation number
+
+  // method attributes
+  if (method->is_loaded()) {
+    const char sync_char      = method->is_synchronized()        ? 's' : ' ';
+    const char exception_char = method->has_exception_handlers() ? '!' : ' ';
+    const char monitors_char  = method->has_monitor_bytecodes()  ? 'm' : ' ';
+
+    // print method attributes
+    st->print(" %c%c%c  ", sync_char, exception_char, monitors_char);
+  } else {
+    //         %s!bn
+    st->print("      ");     // print method attributes
+  }
+
+  if (TieredCompilation) {
+    st->print("  ");
+  }
+  st->print("     ");        // more indent
+  st->print("    ");         // initial inlining indent
+
+  for (int i = 0; i < inline_level; i++)  st->print("  ");
+
+  st->print("@ %d  ", bci);  // print bci
+  method->print_short_name(st);
+  if (method->is_loaded())
+    st->print(" (%d bytes)", method->code_size());
+  else
+    st->print(" (not loaded)");
+
+  if (msg != NULL) {
+    st->print("   %s", msg);
+  }
+  st->cr();
+}
--- a/hotspot/src/share/vm/compiler/compileTask.hpp
+++ b/hotspot/src/share/vm/compiler/compileTask.hpp
@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_COMPILER_COMPILETASK_HPP
+#define SHARE_VM_COMPILER_COMPILETASK_HPP
+
+#include "code/nmethod.hpp"
+#include "ci/ciMethod.hpp"
+#include "compiler/compileLog.hpp"
+#include "memory/allocation.inline.hpp"
+#include "utilities/xmlstream.hpp"
+
+// CompileTask
+//
+// An entry in the compile queue.  It represents a pending or current
+// compilation.
+
+class CompileTask : public CHeapObj<mtCompiler> {
+  friend class VMStructs;
+
+ private:
+  static CompileTask* _task_free_list;
+#ifdef ASSERT
+  static int          _num_allocated_tasks;
+#endif
+
+  Monitor*     _lock;
+  uint         _compile_id;
+  Method*      _method;
+  jobject      _method_holder;
+  int          _osr_bci;
+  bool         _is_complete;
+  bool         _is_success;
+  bool         _is_blocking;
+  int          _comp_level;
+  int          _num_inlined_bytecodes;
+  nmethodLocker* _code_handle;  // holder of eventual result
+  CompileTask* _next, *_prev;
+  bool         _is_free;
+  // Fields used for logging why the compilation was initiated:
+  jlong        _time_queued;  // in units of os::elapsed_counter()
+  Method*      _hot_method;   // which method actually triggered this task
+  jobject      _hot_method_holder;
+  int          _hot_count;    // information about its invocation counter
+  const char*  _comment;      // more info about the task
+  const char*  _failure_reason;
+
+ public:
+  CompileTask() {
+    _lock = new Monitor(Mutex::nonleaf+2, "CompileTaskLock");
+  }
+
+  void initialize(int compile_id, methodHandle method, int osr_bci, int comp_level,
+                  methodHandle hot_method, int hot_count, const char* comment,
+                  bool is_blocking);
+
+  static CompileTask* allocate();
+  static void         free(CompileTask* task);
+
+  int          compile_id() const                { return _compile_id; }
+  Method*      method() const                    { return _method; }
+  Method*      hot_method() const                { return _hot_method; }
+  int          osr_bci() const                   { return _osr_bci; }
+  bool         is_complete() const               { return _is_complete; }
+  bool         is_blocking() const               { return _is_blocking; }
+  bool         is_success() const                { return _is_success; }
+
+  nmethodLocker* code_handle() const             { return _code_handle; }
+  void         set_code_handle(nmethodLocker* l) { _code_handle = l; }
+  nmethod*     code() const;                     // _code_handle->code()
+  void         set_code(nmethod* nm);            // _code_handle->set_code(nm)
+
+  Monitor*     lock() const                      { return _lock; }
+
+  void         mark_complete()                   { _is_complete = true; }
+  void         mark_success()                    { _is_success = true; }
+
+  int          comp_level()                      { return _comp_level;}
+  void         set_comp_level(int comp_level)    { _comp_level = comp_level;}
+
+  int          num_inlined_bytecodes() const     { return _num_inlined_bytecodes; }
+  void         set_num_inlined_bytecodes(int n)  { _num_inlined_bytecodes = n; }
+
+  CompileTask* next() const                      { return _next; }
+  void         set_next(CompileTask* next)       { _next = next; }
+  CompileTask* prev() const                      { return _prev; }
+  void         set_prev(CompileTask* prev)       { _prev = prev; }
+  bool         is_free() const                   { return _is_free; }
+  void         set_is_free(bool val)             { _is_free = val; }
+
+  // RedefineClasses support
+  void         metadata_do(void f(Metadata*));
+  void         mark_on_stack();
+
+private:
+  static void  print_impl(outputStream* st, Method* method, int compile_id, int comp_level,
+                                      bool is_osr_method = false, int osr_bci = -1, bool is_blocking = false,
+                                      const char* msg = NULL, bool short_form = false, bool cr = true);
+
+public:
+  void         print(outputStream* st = tty, const char* msg = NULL, bool short_form = false, bool cr = true);
+  static void  print(outputStream* st, const nmethod* nm, const char* msg = NULL, bool short_form = false, bool cr = true) {
+    print_impl(st, nm->method(), nm->compile_id(), nm->comp_level(),
+                           nm->is_osr_method(), nm->is_osr_method() ? nm->osr_entry_bci() : -1, /*is_blocking*/ false,
+                           msg, short_form, cr);
+  }
+
+  static void  print_inline_indent(int inline_level, outputStream* st = tty);
+
+  void         print_tty();
+  void         print_line_on_error(outputStream* st, char* buf, int buflen);
+
+  void         log_task(xmlStream* log);
+  void         log_task_queued();
+  void         log_task_start(CompileLog* log);
+  void         log_task_done(CompileLog* log);
+
+  void         set_failure_reason(const char* reason) {
+    _failure_reason = reason;
+  }
+
+  bool         check_break_at_flags();
+
+  static void print_inlining_inner(outputStream* st, ciMethod* method, int inline_level, int bci, const char* msg = NULL);
+  static void print_inlining_tty(ciMethod* method, int inline_level, int bci, const char* msg = NULL) {
+    print_inlining_inner(tty, method, inline_level, bci, msg);
+  }
+};
+
+#endif // SHARE_VM_COMPILER_COMPILETASK_HPP
--- a/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp
+++ b/hotspot/src/share/vm/gc/cms/cmsOopClosures.hpp
@ -66,7 +66,8 @@ class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
  virtual void do_klass(Klass* k);
  void do_klass_nv(Klass* k);

-  virtual void do_class_loader_data(ClassLoaderData* cld);
+  virtual void do_cld(ClassLoaderData* cld) { do_cld_nv(cld); }
+  void do_cld_nv(ClassLoaderData* cld);
 };

 class MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
--- a/hotspot/src/share/vm/gc/cms/cmsOopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc/cms/cmsOopClosures.inline.hpp
@ -50,11 +50,11 @@ inline void Par_MarkRefsIntoAndScanClosure::trim_queue(uint max) {

 inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
  ClassLoaderData* cld = k->class_loader_data();
-  do_class_loader_data(cld);
+  do_cld_nv(cld);
 }
 inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }

-inline void MetadataAwareOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
+inline void MetadataAwareOopsInGenClosure::do_cld_nv(ClassLoaderData* cld) {
  assert(_klass_closure._oop_closure == this, "Must be");

  bool claim = true;  // Must claim the class loader data before processing.
--- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp
@ -702,7 +702,7 @@ void FreeListSpace_DCTOC::walk_mem_region_with_cl_par(MemRegion mr,
        !_cfls->CompactibleFreeListSpace::obj_allocated_since_save_marks(       \
                    oop(bottom)) &&                                             \
        !_collector->CMSCollector::is_dead_obj(oop(bottom))) {                  \
-      size_t word_sz = oop(bottom)->oop_iterate(cl, mr);                        \
+      size_t word_sz = oop(bottom)->oop_iterate_size(cl, mr);                   \
      bottom += _cfls->adjustObjectSize(word_sz);                               \
    } else {                                                                    \
      bottom += _cfls->CompactibleFreeListSpace::block_size(bottom);            \
@ -729,7 +729,7 @@ void FreeListSpace_DCTOC::walk_mem_region_with_cl_nopar(MemRegion mr,
        !_cfls->CompactibleFreeListSpace::obj_allocated_since_save_marks(       \
                    oop(bottom)) &&                                             \
        !_collector->CMSCollector::is_dead_obj(oop(bottom))) {                  \
-      size_t word_sz = oop(bottom)->oop_iterate(cl, mr);                        \
+      size_t word_sz = oop(bottom)->oop_iterate_size(cl, mr);                   \
      bottom += _cfls->adjustObjectSize(word_sz);                               \
    } else {                                                                    \
      bottom += _cfls->CompactibleFreeListSpace::block_size_nopar(bottom);      \
@ -2989,7 +2989,7 @@ initialize_sequential_subtasks_for_marking(int n_threads,
  assert(task_size > CardTableModRefBS::card_size_in_words &&
         (task_size %  CardTableModRefBS::card_size_in_words == 0),
         "Otherwise arithmetic below would be incorrect");
-  MemRegion span = _gen->reserved();
+  MemRegion span = _old_gen->reserved();
  if (low != NULL) {
    if (span.contains(low)) {
      // Align low down to  a card boundary so that
--- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.hpp
@ -99,7 +99,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {
  BlockOffsetArrayNonContigSpace _bt;

  CMSCollector* _collector;
-  ConcurrentMarkSweepGeneration* _gen;
+  ConcurrentMarkSweepGeneration* _old_gen;

  // Data structures for free blocks (used during allocation/sweeping)

--- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp
@ -212,7 +212,7 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
                                           use_adaptive_freelists,
                                           dictionaryChoice);
  NOT_PRODUCT(debug_cms_space = _cmsSpace;)
-  _cmsSpace->_gen = this;
+  _cmsSpace->_old_gen = this;

  _gc_stats = new CMSGCStats();

@ -359,13 +359,13 @@ double CMSStats::time_until_cms_gen_full() const {
                                   (size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average());
  if (cms_free > expected_promotion) {
    // Start a cms collection if there isn't enough space to promote
-    // for the next minor collection.  Use the padded average as
+    // for the next young collection.  Use the padded average as
    // a safety factor.
    cms_free -= expected_promotion;

    // Adjust by the safety factor.
    double cms_free_dbl = (double)cms_free;
-    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
+    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor) / 100.0;
    // Apply a further correction factor which tries to adjust
    // for recent occurance of concurrent mode failures.
    cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
@ -531,7 +531,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  if (CMSConcurrentMTEnabled) {
    if (FLAG_IS_DEFAULT(ConcGCThreads)) {
      // just for now
-      FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
+      FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3) / 4);
    }
    if (ConcGCThreads > 1) {
      _conc_workers = new YieldingFlexibleWorkGang("CMS Thread",
@ -592,7 +592,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  _cmsGen ->init_initiating_occupancy(CMSInitiatingOccupancyFraction, CMSTriggerRatio);

  // Clip CMSBootstrapOccupancy between 0 and 100.
-  _bootstrap_occupancy = ((double)CMSBootstrapOccupancy)/(double)100;
+  _bootstrap_occupancy = CMSBootstrapOccupancy / 100.0;

  // Now tell CMS generations the identity of their collector
  ConcurrentMarkSweepGeneration::set_collector(this);
@ -613,7 +613,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
    _end_addr = gch->end_addr();
    assert(_young_gen != NULL, "no _young_gen");
    _eden_chunk_index = 0;
-    _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
+    _eden_chunk_capacity = (_young_gen->max_capacity() + CMSSamplingGrain) / CMSSamplingGrain;
    _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity, mtGC);
  }

@ -795,29 +795,22 @@ void ConcurrentMarkSweepGeneration::compute_new_size_free_list() {
      size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
      gclog_or_tty->print_cr("\nFrom compute_new_size: ");
      gclog_or_tty->print_cr("  Free fraction %f", free_percentage);
-      gclog_or_tty->print_cr("  Desired free fraction %f",
-              desired_free_percentage);
-      gclog_or_tty->print_cr("  Maximum free fraction %f",
-              maximum_free_percentage);
-      gclog_or_tty->print_cr("  Capacity " SIZE_FORMAT, capacity()/1000);
-      gclog_or_tty->print_cr("  Desired capacity " SIZE_FORMAT,
-              desired_capacity/1000);
+      gclog_or_tty->print_cr("  Desired free fraction %f", desired_free_percentage);
+      gclog_or_tty->print_cr("  Maximum free fraction %f", maximum_free_percentage);
+      gclog_or_tty->print_cr("  Capacity " SIZE_FORMAT, capacity() / 1000);
+      gclog_or_tty->print_cr("  Desired capacity " SIZE_FORMAT, desired_capacity / 1000);
      GenCollectedHeap* gch = GenCollectedHeap::heap();
      assert(gch->is_old_gen(this), "The CMS generation should always be the old generation");
      size_t young_size = gch->young_gen()->capacity();
      gclog_or_tty->print_cr("  Young gen size " SIZE_FORMAT, young_size / 1000);
-      gclog_or_tty->print_cr("  unsafe_max_alloc_nogc " SIZE_FORMAT,
-              unsafe_max_alloc_nogc()/1000);
-      gclog_or_tty->print_cr("  contiguous available " SIZE_FORMAT,
-              contiguous_available()/1000);
-      gclog_or_tty->print_cr("  Expand by " SIZE_FORMAT " (bytes)",
-              expand_bytes);
+      gclog_or_tty->print_cr("  unsafe_max_alloc_nogc " SIZE_FORMAT, unsafe_max_alloc_nogc() / 1000);
+      gclog_or_tty->print_cr("  contiguous available " SIZE_FORMAT, contiguous_available() / 1000);
+      gclog_or_tty->print_cr("  Expand by " SIZE_FORMAT " (bytes)", expand_bytes);
    }
    // safe if expansion fails
    expand_for_gc_cause(expand_bytes, 0, CMSExpansionCause::_satisfy_free_ratio);
    if (PrintGCDetails && Verbose) {
-      gclog_or_tty->print_cr("  Expanded free fraction %f",
-        ((double) free()) / capacity());
+      gclog_or_tty->print_cr("  Expanded free fraction %f", ((double) free()) / capacity());
    }
  } else {
    size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
@ -834,16 +827,14 @@ Mutex* ConcurrentMarkSweepGeneration::freelistLock() const {
  return cmsSpace()->freelistLock();
 }

-HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size,
-                                                  bool   tlab) {
+HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size, bool tlab) {
  CMSSynchronousYieldRequest yr;
-  MutexLockerEx x(freelistLock(),
-                  Mutex::_no_safepoint_check_flag);
+  MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
  return have_lock_and_allocate(size, tlab);
 }

 HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size,
-                                                  bool   tlab /* ignored */) {
+                                                                bool   tlab /* ignored */) {
  assert_lock_strong(freelistLock());
  size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size);
  HeapWord* res = cmsSpace()->allocate(adjustedSize);
@ -2426,7 +2417,7 @@ void CMSCollector::verify_after_remark_work_1() {

    gch->gen_process_roots(&srs,
                           GenCollectedHeap::OldGen,
-                           true,   // younger gens are roots
+                           true,   // young gen as roots
                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                           should_unload_classes(),
                           &notOlder,
@ -2498,7 +2489,7 @@ void CMSCollector::verify_after_remark_work_2() {

    gch->gen_process_roots(&srs,
                           GenCollectedHeap::OldGen,
-                           true,   // younger gens are roots
+                           true,   // young gen as roots
                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                           should_unload_classes(),
                           &notOlder,
@ -2952,12 +2943,7 @@ void CMSCollector::checkpointRootsInitialWork() {
  assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
  assert(_collectorState == InitialMarking, "just checking");

-  // If there has not been a GC[n-1] since last GC[n] cycle completed,
-  // precede our marking with a collection of all
-  // younger generations to keep floating garbage to a minimum.
-  // XXX: we won't do this for now -- it's an optimization to be done later.
-
-  // already have locks
+  // Already have locks.
  assert_lock_strong(bitMapLock());
  assert(_markBitMap.isAllClear(), "was reset at end of previous cycle");

@ -3027,7 +3013,7 @@ void CMSCollector::checkpointRootsInitialWork() {

      gch->gen_process_roots(&srs,
                             GenCollectedHeap::OldGen,
-                             true,   // younger gens are roots
+                             true,   // young gen as roots
                             GenCollectedHeap::ScanningOption(roots_scanning_options()),
                             should_unload_classes(),
                             &notOlder,
@ -3037,7 +3023,7 @@ void CMSCollector::checkpointRootsInitialWork() {
  }

  // Clear mod-union table; it will be dirtied in the prologue of
-  // CMS generation per each younger generation collection.
+  // CMS generation per each young generation collection.

  assert(_modUnionTable.isAllClear(),
       "Was cleared in most recent final checkpoint phase"
@ -3057,7 +3043,7 @@ bool CMSCollector::markFromRoots() {
  // assert(!SafepointSynchronize::is_at_safepoint(),
  //        "inconsistent argument?");
  // However that wouldn't be right, because it's possible that
-  // a safepoint is indeed in progress as a younger generation
+  // a safepoint is indeed in progress as a young generation
  // stop-the-world GC happens even as we mark in this generation.
  assert(_collectorState == Marking, "inconsistent state?");
  check_correct_thread_executing();
@ -3065,7 +3051,7 @@ bool CMSCollector::markFromRoots() {

  // Weak ref discovery note: We may be discovering weak
  // refs in this generation concurrent (but interleaved) with
-  // weak ref discovery by a younger generation collector.
+  // weak ref discovery by the young generation collector.

  CMSTokenSyncWithLocks ts(true, bitMapLock());
  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
@ -3095,7 +3081,7 @@ bool CMSCollector::markFromRootsWork() {

  // Note that when we do a marking step we need to hold the
  // bit map lock -- recall that direct allocation (by mutators)
-  // and promotion (by younger generation collectors) is also
+  // and promotion (by the young generation collector) is also
  // marking the bit map. [the so-called allocate live policy.]
  // Because the implementation of bit map marking is not
  // robust wrt simultaneous marking of bits in the same word,
@ -4049,7 +4035,7 @@ size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
 // one of these methods, please check the other method too.

 size_t CMSCollector::preclean_mod_union_table(
-  ConcurrentMarkSweepGeneration* gen,
+  ConcurrentMarkSweepGeneration* old_gen,
  ScanMarkedObjectsAgainCarefullyClosure* cl) {
  verify_work_stacks_empty();
  verify_overflow_empty();
@ -4064,10 +4050,10 @@ size_t CMSCollector::preclean_mod_union_table(
  // generation, but we might potentially miss cards when the
  // generation is rapidly expanding while we are in the midst
  // of precleaning.
-  HeapWord* startAddr = gen->reserved().start();
-  HeapWord* endAddr   = gen->reserved().end();
+  HeapWord* startAddr = old_gen->reserved().start();
+  HeapWord* endAddr   = old_gen->reserved().end();

-  cl->setFreelistLock(gen->freelistLock());   // needed for yielding
+  cl->setFreelistLock(old_gen->freelistLock());   // needed for yielding

  size_t numDirtyCards, cumNumDirtyCards;
  HeapWord *nextAddr, *lastAddr;
@ -4109,7 +4095,7 @@ size_t CMSCollector::preclean_mod_union_table(
      HeapWord* stop_point = NULL;
      stopTimer();
      // Potential yield point
-      CMSTokenSyncWithLocks ts(true, gen->freelistLock(),
+      CMSTokenSyncWithLocks ts(true, old_gen->freelistLock(),
                               bitMapLock());
      startTimer();
      {
@ -4117,7 +4103,7 @@ size_t CMSCollector::preclean_mod_union_table(
        verify_overflow_empty();
        sample_eden();
        stop_point =
-          gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
+          old_gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
      }
      if (stop_point != NULL) {
        // The careful iteration stopped early either because it found an
@ -4152,15 +4138,15 @@ size_t CMSCollector::preclean_mod_union_table(
 // below are largely identical; if you need to modify
 // one of these methods, please check the other method too.

-size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
+size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* old_gen,
  ScanMarkedObjectsAgainCarefullyClosure* cl) {
  // strategy: it's similar to precleamModUnionTable above, in that
  // we accumulate contiguous ranges of dirty cards, mark these cards
  // precleaned, then scan the region covered by these cards.
-  HeapWord* endAddr   = (HeapWord*)(gen->_virtual_space.high());
-  HeapWord* startAddr = (HeapWord*)(gen->_virtual_space.low());
+  HeapWord* endAddr   = (HeapWord*)(old_gen->_virtual_space.high());
+  HeapWord* startAddr = (HeapWord*)(old_gen->_virtual_space.low());

-  cl->setFreelistLock(gen->freelistLock());   // needed for yielding
+  cl->setFreelistLock(old_gen->freelistLock());   // needed for yielding

  size_t numDirtyCards, cumNumDirtyCards;
  HeapWord *lastAddr, *nextAddr;
@ -4197,13 +4183,13 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,

    if (!dirtyRegion.is_empty()) {
      stopTimer();
-      CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock());
+      CMSTokenSyncWithLocks ts(true, old_gen->freelistLock(), bitMapLock());
      startTimer();
      sample_eden();
      verify_work_stacks_empty();
      verify_overflow_empty();
      HeapWord* stop_point =
-        gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
+        old_gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
      if (stop_point != NULL) {
        assert((_collectorState == AbortablePreclean && should_abort_preclean()),
               "Should only be AbortablePreclean.");
@ -4623,7 +4609,7 @@ void CMSParRemarkTask::work(uint worker_id) {
    ResourceMark rm;
    GrowableArray<ClassLoaderData*>* array = ClassLoaderDataGraph::new_clds();
    for (int i = 0; i < array->length(); i++) {
-      par_mrias_cl.do_class_loader_data(array->at(i));
+      par_mrias_cl.do_cld_nv(array->at(i));
    }

    // We don't need to keep track of new CLDs anymore.
@ -5086,7 +5072,7 @@ void CMSCollector::do_remark_parallel() {
  // preclean phase did of eden, plus the [two] tasks of
  // scanning the [two] survivor spaces. Further fine-grain
  // parallelization of the scanning of the survivor spaces
-  // themselves, and of precleaning of the younger gen itself
+  // themselves, and of precleaning of the young gen itself
  // is deferred to the future.
  initialize_sequential_subtasks_for_young_gen_rescan(n_workers);

@ -5177,7 +5163,7 @@ void CMSCollector::do_remark_non_parallel() {

    gch->gen_process_roots(&srs,
                           GenCollectedHeap::OldGen,
-                           true,  // younger gens as roots
+                           true,  // young gen as roots
                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
                           should_unload_classes(),
                           &mrias_cl,
@ -5199,7 +5185,7 @@ void CMSCollector::do_remark_non_parallel() {
    ResourceMark rm;
    GrowableArray<ClassLoaderData*>* array = ClassLoaderDataGraph::new_clds();
    for (int i = 0; i < array->length(); i++) {
-      mrias_cl.do_class_loader_data(array->at(i));
+      mrias_cl.do_cld_nv(array->at(i));
    }

    // We don't need to keep track of new CLDs anymore.
@ -5661,7 +5647,7 @@ void ConcurrentMarkSweepGeneration::update_gc_stats(Generation* current_generati
  }
 }

-void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen) {
+void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* old_gen) {
  // We iterate over the space(s) underlying this generation,
  // checking the mark bit map to see if the bits corresponding
  // to specific blocks are marked or not. Blocks that are
@ -5690,26 +5676,26 @@ void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen) {
  // check that we hold the requisite locks
  assert(have_cms_token(), "Should hold cms token");
  assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), "Should possess CMS token to sweep");
-  assert_lock_strong(gen->freelistLock());
+  assert_lock_strong(old_gen->freelistLock());
  assert_lock_strong(bitMapLock());

  assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
  assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
-  gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
-                                      _inter_sweep_estimate.padded_average(),
-                                      _intra_sweep_estimate.padded_average());
-  gen->setNearLargestChunk();
+  old_gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                          _inter_sweep_estimate.padded_average(),
+                                          _intra_sweep_estimate.padded_average());
+  old_gen->setNearLargestChunk();

  {
-    SweepClosure sweepClosure(this, gen, &_markBitMap, CMSYield);
-    gen->cmsSpace()->blk_iterate_careful(&sweepClosure);
+    SweepClosure sweepClosure(this, old_gen, &_markBitMap, CMSYield);
+    old_gen->cmsSpace()->blk_iterate_careful(&sweepClosure);
    // We need to free-up/coalesce garbage/blocks from a
    // co-terminal free run. This is done in the SweepClosure
    // destructor; so, do not remove this scope, else the
    // end-of-sweep-census below will be off by a little bit.
  }
-  gen->cmsSpace()->sweep_completed();
-  gen->cmsSpace()->endSweepFLCensus(sweep_count());
+  old_gen->cmsSpace()->sweep_completed();
+  old_gen->cmsSpace()->endSweepFLCensus(sweep_count());
  if (should_unload_classes()) {                // unloaded classes this cycle,
    _concurrent_cycles_since_last_unload = 0;   // ... reset count
  } else {                                      // did not unload classes,
@ -6324,12 +6310,12 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
          // objArrays are precisely marked; restrict scanning
          // to dirty cards only.
          size = CompactibleFreeListSpace::adjustObjectSize(
-                   p->oop_iterate(_scanningClosure, mr));
+                   p->oop_iterate_size(_scanningClosure, mr));
        } else {
          // A non-array may have been imprecisely marked; we need
          // to scan object in its entirety.
          size = CompactibleFreeListSpace::adjustObjectSize(
-                   p->oop_iterate(_scanningClosure));
+                   p->oop_iterate_size(_scanningClosure));
        }
        #ifdef ASSERT
          size_t direct_size =
@ -6417,7 +6403,7 @@ size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
  // Note that we do not yield while we iterate over
  // the interior oops of p, pushing the relevant ones
  // on our marking stack.
-  size_t size = p->oop_iterate(_scanning_closure);
+  size_t size = p->oop_iterate_size(_scanning_closure);
  do_yield_check();
  // Observe that below, we do not abandon the preclean
  // phase as soon as we should; rather we empty the
--- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.hpp
@ -723,7 +723,7 @@ class CMSCollector: public CHeapObj<mtGC> {

 private:
  // Support for parallelizing young gen rescan in CMS remark phase
-  ParNewGeneration* _young_gen;  // the younger gen
+  ParNewGeneration* _young_gen;

  HeapWord** _top_addr;    // ... Top of Eden
  HeapWord** _end_addr;    // ... End of Eden
@ -772,9 +772,9 @@ class CMSCollector: public CHeapObj<mtGC> {
 private:

  // Concurrent precleaning work
-  size_t preclean_mod_union_table(ConcurrentMarkSweepGeneration* gen,
+  size_t preclean_mod_union_table(ConcurrentMarkSweepGeneration* old_gen,
                                  ScanMarkedObjectsAgainCarefullyClosure* cl);
-  size_t preclean_card_table(ConcurrentMarkSweepGeneration* gen,
+  size_t preclean_card_table(ConcurrentMarkSweepGeneration* old_gen,
                             ScanMarkedObjectsAgainCarefullyClosure* cl);
  // Does precleaning work, returning a quantity indicative of
  // the amount of "useful work" done.
@ -797,7 +797,7 @@ class CMSCollector: public CHeapObj<mtGC> {
  void refProcessingWork();

  // Concurrent sweeping work
-  void sweepWork(ConcurrentMarkSweepGeneration* gen);
+  void sweepWork(ConcurrentMarkSweepGeneration* old_gen);

  // (Concurrent) resetting of support data structures
  void reset(bool concurrent);
@ -1120,10 +1120,8 @@ class ConcurrentMarkSweepGeneration: public CardGeneration {
  MemRegion used_region_at_save_marks() const;

  // Does a "full" (forced) collection invoked on this generation collect
-  // all younger generations as well? Note that the second conjunct is a
-  // hack to allow the collection of the younger gen first if the flag is
-  // set.
-  virtual bool full_collects_younger_generations() const {
+  // the young generation as well?
+  virtual bool full_collects_young_generation() const {
    return !ScavengeBeforeFullGC;
  }

@ -1153,9 +1151,8 @@ class ConcurrentMarkSweepGeneration: public CardGeneration {

  virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes) const;

-  // Inform this (non-young) generation that a promotion failure was
-  // encountered during a collection of a younger generation that
-  // promotes into this generation.
+  // Inform this (old) generation that a promotion failure was
+  // encountered during a collection of the young generation.
  virtual void promotion_failure_occurred();

  bool should_collect(bool full, size_t size, bool tlab);
--- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.inline.hpp
+++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.inline.hpp
@ -295,7 +295,7 @@ inline void CMSStats::record_gc0_end(size_t cms_gen_bytes_used) {
    promoted_bytes = _cms_used_at_gc0_end - _cms_used_at_gc0_begin;
  }

-  // If the younger gen collections were skipped, then the
+  // If the young gen collection was skipped, then the
  // number of promoted bytes will be 0 and adding it to the
  // average will incorrectly lessen the average.  It is, however,
  // also possible that no promotion was needed.
--- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepThread.cpp
+++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepThread.cpp
@ -39,23 +39,17 @@

 // ======= Concurrent Mark Sweep Thread ========

-// The CMS thread is created when Concurrent Mark Sweep is used in the
-// older of two generations in a generational memory system.
+ConcurrentMarkSweepThread* ConcurrentMarkSweepThread::_cmst = NULL;
+CMSCollector* ConcurrentMarkSweepThread::_collector         = NULL;
+bool ConcurrentMarkSweepThread::_should_terminate           = false;
+int  ConcurrentMarkSweepThread::_CMS_flag                   = CMS_nil;

-ConcurrentMarkSweepThread*
-     ConcurrentMarkSweepThread::_cmst     = NULL;
-CMSCollector* ConcurrentMarkSweepThread::_collector = NULL;
-bool ConcurrentMarkSweepThread::_should_terminate = false;
-int  ConcurrentMarkSweepThread::_CMS_flag         = CMS_nil;
+volatile jint ConcurrentMarkSweepThread::_pending_yields    = 0;

-volatile jint ConcurrentMarkSweepThread::_pending_yields      = 0;
-
-SurrogateLockerThread*
-     ConcurrentMarkSweepThread::_slt = NULL;
+SurrogateLockerThread* ConcurrentMarkSweepThread::_slt      = NULL;
 SurrogateLockerThread::SLT_msg_type
     ConcurrentMarkSweepThread::_sltBuffer = SurrogateLockerThread::empty;
-Monitor*
-     ConcurrentMarkSweepThread::_sltMonitor = NULL;
+Monitor* ConcurrentMarkSweepThread::_sltMonitor             = NULL;

 ConcurrentMarkSweepThread::ConcurrentMarkSweepThread(CMSCollector* collector)
  : ConcurrentGCThread() {
--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.cpp
@ -69,20 +69,28 @@ ParScanThreadState::ParScanThreadState(Space* to_space_,
                                       Stack<oop, mtGC>* overflow_stacks_,
                                       size_t desired_plab_sz_,
                                       ParallelTaskTerminator& term_) :
-  _to_space(to_space_), _old_gen(old_gen_), _young_gen(young_gen_), _thread_num(thread_num_),
-  _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false),
+  _to_space(to_space_),
+  _old_gen(old_gen_),
+  _young_gen(young_gen_),
+  _thread_num(thread_num_),
+  _work_queue(work_queue_set_->queue(thread_num_)),
+  _to_space_full(false),
  _overflow_stack(overflow_stacks_ ? overflow_stacks_ + thread_num_ : NULL),
  _ageTable(false), // false ==> not the global age table, no perf data.
  _to_space_alloc_buffer(desired_plab_sz_),
-  _to_space_closure(young_gen_, this), _old_gen_closure(young_gen_, this),
-  _to_space_root_closure(young_gen_, this), _old_gen_root_closure(young_gen_, this),
+  _to_space_closure(young_gen_, this),
+  _old_gen_closure(young_gen_, this),
+  _to_space_root_closure(young_gen_, this),
+  _old_gen_root_closure(young_gen_, this),
  _older_gen_closure(young_gen_, this),
  _evacuate_followers(this, &_to_space_closure, &_old_gen_closure,
                      &_to_space_root_closure, young_gen_, &_old_gen_root_closure,
                      work_queue_set_, &term_),
-  _is_alive_closure(young_gen_), _scan_weak_ref_closure(young_gen_, this),
+  _is_alive_closure(young_gen_),
+  _scan_weak_ref_closure(young_gen_, this),
  _keep_alive_closure(&_scan_weak_ref_closure),
-  _strong_roots_time(0.0), _term_time(0.0)
+  _strong_roots_time(0.0),
+  _term_time(0.0)
 {
  #if TASKQUEUE_STATS
  _term_attempts = 0;
@ -90,8 +98,7 @@ ParScanThreadState::ParScanThreadState(Space* to_space_,
  _overflow_refill_objs = 0;
  #endif // TASKQUEUE_STATS

-  _survivor_chunk_array =
-    (ChunkArray*) old_gen()->get_data_recorder(thread_num());
+  _survivor_chunk_array = (ChunkArray*) old_gen()->get_data_recorder(thread_num());
  _hash_seed = 17;  // Might want to take time-based random value.
  _start = os::elapsedTime();
  _old_gen_closure.set_generation(old_gen_);
@ -154,7 +161,6 @@ void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) {
  }
 }

-
 void ParScanThreadState::trim_queues(int max_size) {
  ObjToScanQueue* queue = work_queue();
  do {
@ -222,15 +228,12 @@ void ParScanThreadState::push_on_overflow_stack(oop p) {
 }

 HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
-
-  // Otherwise, if the object is small enough, try to reallocate the
-  // buffer.
+  // If the object is small enough, try to reallocate the buffer.
  HeapWord* obj = NULL;
  if (!_to_space_full) {
    PLAB* const plab = to_space_alloc_buffer();
-    Space*            const sp   = to_space();
-    if (word_sz * 100 <
-        ParallelGCBufferWastePct * plab->word_sz()) {
+    Space* const sp  = to_space();
+    if (word_sz * 100 < ParallelGCBufferWastePct * plab->word_sz()) {
      // Is small enough; abandon this buffer and start a new one.
      plab->retire();
      size_t buf_size = plab->word_sz();
@ -241,8 +244,7 @@ HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
        size_t free_bytes = sp->free();
        while(buf_space == NULL && free_bytes >= min_bytes) {
          buf_size = free_bytes >> LogHeapWordSize;
-          assert(buf_size == (size_t)align_object_size(buf_size),
-                 "Invariant");
+          assert(buf_size == (size_t)align_object_size(buf_size), "Invariant");
          buf_space  = sp->par_allocate(buf_size);
          free_bytes = sp->free();
        }
@ -262,7 +264,6 @@ HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
        // We're used up.
        _to_space_full = true;
      }
-
    } else {
      // Too large; allocate the object individually.
      obj = sp->par_allocate(word_sz);
@ -271,7 +272,6 @@ HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) {
  return obj;
 }

-
 void ParScanThreadState::undo_alloc_in_to_space(HeapWord* obj, size_t word_sz) {
  to_space_alloc_buffer()->undo_allocation(obj, word_sz);
 }
@ -288,7 +288,7 @@ public:
  // Initializes states for the specified number of threads;
  ParScanThreadStateSet(int                     num_threads,
                        Space&                  to_space,
-                        ParNewGeneration&       gen,
+                        ParNewGeneration&       young_gen,
                        Generation&             old_gen,
                        ObjToScanQueueSet&      queue_set,
                        Stack<oop, mtGC>*       overflow_stacks_,
@ -315,21 +315,25 @@ public:

 private:
  ParallelTaskTerminator& _term;
-  ParNewGeneration&       _gen;
+  ParNewGeneration&       _young_gen;
  Generation&             _old_gen;
 public:
  bool is_valid(int id) const { return id < length(); }
  ParallelTaskTerminator* terminator() { return &_term; }
 };

-
-ParScanThreadStateSet::ParScanThreadStateSet(
-  int num_threads, Space& to_space, ParNewGeneration& gen,
-  Generation& old_gen, ObjToScanQueueSet& queue_set,
-  Stack<oop, mtGC>* overflow_stacks,
-  size_t desired_plab_sz, ParallelTaskTerminator& term)
+ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
+                                             Space& to_space,
+                                             ParNewGeneration& young_gen,
+                                             Generation& old_gen,
+                                             ObjToScanQueueSet& queue_set,
+                                             Stack<oop, mtGC>* overflow_stacks,
+                                             size_t desired_plab_sz,
+                                             ParallelTaskTerminator& term)
  : ResourceArray(sizeof(ParScanThreadState), num_threads),
-    _gen(gen), _old_gen(old_gen), _term(term)
+    _young_gen(young_gen),
+    _old_gen(old_gen),
+    _term(term)
 {
  assert(num_threads > 0, "sanity check!");
  assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
@ -337,13 +341,12 @@ ParScanThreadStateSet::ParScanThreadStateSet(
  // Initialize states.
  for (int i = 0; i < num_threads; ++i) {
    new ((ParScanThreadState*)_data + i)
-        ParScanThreadState(&to_space, &gen, &old_gen, i, &queue_set,
+        ParScanThreadState(&to_space, &young_gen, &old_gen, i, &queue_set,
                           overflow_stacks, desired_plab_sz, term);
  }
 }

-inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
-{
+inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i) {
  assert(i >= 0 && i < length(), "sanity check!");
  return ((ParScanThreadState*)_data)[i];
 }
@ -357,8 +360,7 @@ void ParScanThreadStateSet::trace_promotion_failed(const YoungGCTracer* gc_trace
  }
 }

-void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed)
-{
+void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed) {
  _term.reset_for_reuse(active_threads);
  if (promotion_failed) {
    for (int i = 0; i < length(); ++i) {
@ -368,36 +370,27 @@ void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed)
 }

 #if TASKQUEUE_STATS
-void
-ParScanThreadState::reset_stats()
-{
+void ParScanThreadState::reset_stats() {
  taskqueue_stats().reset();
  _term_attempts = 0;
  _overflow_refills = 0;
  _overflow_refill_objs = 0;
 }

-void ParScanThreadStateSet::reset_stats()
-{
+void ParScanThreadStateSet::reset_stats() {
  for (int i = 0; i < length(); ++i) {
    thread_state(i).reset_stats();
  }
 }

-void
-ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st)
-{
+void ParScanThreadStateSet::print_termination_stats_hdr(outputStream* const st) {
  st->print_raw_cr("GC Termination Stats");
-  st->print_raw_cr("     elapsed  --strong roots-- "
-                   "-------termination-------");
-  st->print_raw_cr("thr     ms        ms       %   "
-                   "    ms       %   attempts");
-  st->print_raw_cr("--- --------- --------- ------ "
-                   "--------- ------ --------");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination-------");
+  st->print_raw_cr("thr     ms        ms       %       ms       %   attempts");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ --------");
 }

-void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
-{
+void ParScanThreadStateSet::print_termination_stats(outputStream* const st) {
  print_termination_stats_hdr(st);

  for (int i = 0; i < length(); ++i) {
@ -405,23 +398,20 @@ void ParScanThreadStateSet::print_termination_stats(outputStream* const st)
    const double elapsed_ms = pss.elapsed_time() * 1000.0;
    const double s_roots_ms = pss.strong_roots_time() * 1000.0;
    const double term_ms = pss.term_time() * 1000.0;
-    st->print_cr("%3d %9.2f %9.2f %6.2f "
-                 "%9.2f %6.2f " SIZE_FORMAT_W(8),
+    st->print_cr("%3d %9.2f %9.2f %6.2f %9.2f %6.2f " SIZE_FORMAT_W(8),
                 i, elapsed_ms, s_roots_ms, s_roots_ms * 100 / elapsed_ms,
                 term_ms, term_ms * 100 / elapsed_ms, pss.term_attempts());
  }
 }

 // Print stats related to work queue activity.
-void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st)
-{
+void ParScanThreadStateSet::print_taskqueue_stats_hdr(outputStream* const st) {
  st->print_raw_cr("GC Task Stats");
  st->print_raw("thr "); TaskQueueStats::print_header(1, st); st->cr();
  st->print_raw("--- "); TaskQueueStats::print_header(2, st); st->cr();
 }

-void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
-{
+void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st) {
  print_taskqueue_stats_hdr(st);

  TaskQueueStats totals;
@ -443,8 +433,7 @@ void ParScanThreadStateSet::print_taskqueue_stats(outputStream* const st)
 }
 #endif // TASKQUEUE_STATS

-void ParScanThreadStateSet::flush()
-{
+void ParScanThreadStateSet::flush() {
  // Work in this loop should be kept as lightweight as
  // possible since this might otherwise become a bottleneck
  // to scaling. Should we add heavy-weight work into this
@ -454,12 +443,12 @@ void ParScanThreadStateSet::flush()

    // Flush stats related to To-space PLAB activity and
    // retire the last buffer.
-    par_scan_state.to_space_alloc_buffer()->flush_and_retire_stats(_gen.plab_stats());
+    par_scan_state.to_space_alloc_buffer()->flush_and_retire_stats(_young_gen.plab_stats());

    // Every thread has its own age table.  We need to merge
    // them all into one.
    ageTable *local_table = par_scan_state.age_table();
-    _gen.age_table()->merge(local_table);
+    _young_gen.age_table()->merge(local_table);

    // Inform old gen that we're done.
    _old_gen.par_promote_alloc_done(i);
@ -478,8 +467,7 @@ void ParScanThreadStateSet::flush()

 ParScanClosure::ParScanClosure(ParNewGeneration* g,
                               ParScanThreadState* par_scan_state) :
-  OopsInKlassOrGenClosure(g), _par_scan_state(par_scan_state), _g(g)
-{
+  OopsInKlassOrGenClosure(g), _par_scan_state(par_scan_state), _g(g) {
  _boundary = _g->reserved().end();
 }

@ -531,24 +519,23 @@ void ParEvacuateFollowersClosure::do_void() {
  ObjToScanQueue* work_q = par_scan_state()->work_queue();

  while (true) {
-
    // Scan to-space and old-gen objs until we run out of both.
    oop obj_to_scan;
    par_scan_state()->trim_queues(0);

    // We have no local work, attempt to steal from other threads.

-    // attempt to steal work from promoted.
+    // Attempt to steal work from promoted.
    if (task_queues()->steal(par_scan_state()->thread_num(),
                             par_scan_state()->hash_seed(),
                             obj_to_scan)) {
      bool res = work_q->push(obj_to_scan);
      assert(res, "Empty queue should have room for a push.");

-      //   if successful, goto Start.
+      // If successful, goto Start.
      continue;

-      // try global overflow list.
+      // Try global overflow list.
    } else if (par_gen()->take_from_overflow_list(par_scan_state())) {
      continue;
    }
@ -564,15 +551,17 @@ void ParEvacuateFollowersClosure::do_void() {
  par_scan_state()->end_term_time();
 }

-ParNewGenTask::ParNewGenTask(ParNewGeneration* young_gen, Generation* old_gen,
-                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set,
+ParNewGenTask::ParNewGenTask(ParNewGeneration* young_gen,
+                             Generation* old_gen,
+                             HeapWord* young_old_boundary,
+                             ParScanThreadStateSet* state_set,
                             StrongRootsScope* strong_roots_scope) :
    AbstractGangTask("ParNewGeneration collection"),
    _young_gen(young_gen), _old_gen(old_gen),
    _young_old_boundary(young_old_boundary),
    _state_set(state_set),
    _strong_roots_scope(strong_roots_scope)
-  {}
+{}

 void ParNewGenTask::work(uint worker_id) {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
@ -595,8 +584,7 @@ void ParNewGenTask::work(uint worker_id) {
  par_scan_state.start_strong_roots();
  gch->gen_process_roots(_strong_roots_scope,
                         GenCollectedHeap::YoungGen,
-                         true,  // Process younger gens, if any,
-                                // as strong roots.
+                         true,  // Process younger gens, if any, as strong roots.
                         GenCollectedHeap::SO_ScavengeCodeCache,
                         GenCollectedHeap::StrongAndWeakRoots,
                         &par_scan_state.to_space_root_closure(),
@ -613,8 +601,7 @@ void ParNewGenTask::work(uint worker_id) {
 #pragma warning( push )
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif
-ParNewGeneration::
-ParNewGeneration(ReservedSpace rs, size_t initial_byte_size)
+ParNewGeneration::ParNewGeneration(ReservedSpace rs, size_t initial_byte_size)
  : DefNewGeneration(rs, initial_byte_size, "PCopy"),
  _overflow_list(NULL),
  _is_alive_closure(this),
@ -625,20 +612,19 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size)
  _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
  guarantee(_task_queues != NULL, "task_queues allocation failure.");

-  for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
+  for (uint i = 0; i < ParallelGCThreads; i++) {
    ObjToScanQueue *q = new ObjToScanQueue();
    guarantee(q != NULL, "work_queue Allocation failure.");
-    _task_queues->register_queue(i1, q);
+    _task_queues->register_queue(i, q);
  }

-  for (uint i2 = 0; i2 < ParallelGCThreads; i2++)
-    _task_queues->queue(i2)->initialize();
+  for (uint i = 0; i < ParallelGCThreads; i++) {
+    _task_queues->queue(i)->initialize();
+  }

  _overflow_stacks = NULL;
  if (ParGCUseLocalOverflow) {
-
-    // typedef to workaround NEW_C_HEAP_ARRAY macro, which can not deal
-    // with ','
+    // typedef to workaround NEW_C_HEAP_ARRAY macro, which can not deal with ','
    typedef Stack<oop, mtGC> GCOopStack;

    _overflow_stacks = NEW_C_HEAP_ARRAY(GCOopStack, ParallelGCThreads, mtGC);
@ -742,7 +728,7 @@ class ParNewRefProcTaskProxy: public AbstractGangTask {
  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
 public:
  ParNewRefProcTaskProxy(ProcessTask& task,
-                         ParNewGeneration& gen,
+                         ParNewGeneration& young_gen,
                         Generation& old_gen,
                         HeapWord* young_old_boundary,
                         ParScanThreadStateSet& state_set);
@ -768,11 +754,9 @@ ParNewRefProcTaskProxy::ParNewRefProcTaskProxy(ProcessTask& task,
    _old_gen(old_gen),
    _young_old_boundary(young_old_boundary),
    _state_set(state_set)
-{
-}
+{ }

-void ParNewRefProcTaskProxy::work(uint worker_id)
-{
+void ParNewRefProcTaskProxy::work(uint worker_id) {
  ResourceMark rm;
  HandleMark hm;
  ParScanThreadState& par_scan_state = _state_set.thread_state(worker_id);
@ -792,15 +776,12 @@ public:
      _task(task)
  { }

-  virtual void work(uint worker_id)
-  {
+  virtual void work(uint worker_id) {
    _task.work(worker_id);
  }
 };

-
-void ParNewRefProcTaskExecutor::execute(ProcessTask& task)
-{
+void ParNewRefProcTaskExecutor::execute(ProcessTask& task) {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
  WorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
@ -812,8 +793,7 @@ void ParNewRefProcTaskExecutor::execute(ProcessTask& task)
                   _young_gen.promotion_failed());
 }

-void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
-{
+void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
  WorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
@ -821,8 +801,7 @@ void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
  workers->run_task(&enq_task);
 }

-void ParNewRefProcTaskExecutor::set_single_threaded_mode()
-{
+void ParNewRefProcTaskExecutor::set_single_threaded_mode() {
  _state_set.flush();
  GenCollectedHeap* gch = GenCollectedHeap::heap();
  gch->save_marks();
@ -830,7 +809,8 @@ void ParNewRefProcTaskExecutor::set_single_threaded_mode()

 ScanClosureWithParBarrier::
 ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier) :
-  ScanClosure(g, gc_barrier) {}
+  ScanClosure(g, gc_barrier)
+{ }

 EvacuateFollowersClosureGeneral::
 EvacuateFollowersClosureGeneral(GenCollectedHeap* gch,
@ -838,7 +818,7 @@ EvacuateFollowersClosureGeneral(GenCollectedHeap* gch,
                                OopsInGenClosure* older) :
  _gch(gch),
  _scan_cur_or_nonheap(cur), _scan_older(older)
-{}
+{ }

 void EvacuateFollowersClosureGeneral::do_void() {
  do {
@ -850,7 +830,6 @@ void EvacuateFollowersClosureGeneral::do_void() {
  } while (!_gch->no_allocs_since_save_marks());
 }

-
 // A Generation that does parallel young-gen collection.

 void ParNewGeneration::handle_promotion_failed(GenCollectedHeap* gch, ParScanThreadStateSet& thread_state_set) {
@ -996,9 +975,9 @@ void ParNewGeneration::collect(bool   full,
    if (ZapUnusedHeapArea) {
      // This is now done here because of the piece-meal mangling which
      // can check for valid mangling at intermediate points in the
-      // collection(s).  When a minor collection fails to collect
+      // collection(s).  When a young collection fails to collect
      // sufficient space resizing of the young generation can occur
-      // an redistribute the spaces in the young generation.  Mangle
+      // and redistribute the spaces in the young generation.  Mangle
      // here so that unzapped regions don't get distributed to
      // other spaces.
      to()->mangle_unused_area();
@ -1113,8 +1092,10 @@ void ParNewGeneration::preserve_mark_if_necessary(oop obj, markOop m) {
 // thus avoiding the need to undo the copy as in
 // copy_to_survivor_space_avoiding_with_undo.

-oop ParNewGeneration::copy_to_survivor_space(
-        ParScanThreadState* par_scan_state, oop old, size_t sz, markOop m) {
+oop ParNewGeneration::copy_to_survivor_space(ParScanThreadState* par_scan_state,
+                                             oop old,
+                                             size_t sz,
+                                             markOop m) {
  // In the sequential version, this assert also says that the object is
  // not forwarded.  That might not be the case here.  It is the case that
  // the caller observed it to be not forwarded at some time in the past.
@ -1141,8 +1122,7 @@ oop ParNewGeneration::copy_to_survivor_space(
  }

  if (new_obj == NULL) {
-    // Either to-space is full or we decided to promote
-    // try allocating obj tenured
+    // Either to-space is full or we decided to promote try allocating obj tenured

    // Attempt to install a null forwarding pointer (atomically),
    // to claim the right to install the real forwarding pointer.
--- a/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp
+++ b/hotspot/src/share/vm/gc/cms/parNewGeneration.hpp
@ -71,11 +71,7 @@ class ParScanThreadState {
  ParScanWithoutBarrierClosure         _to_space_closure; // scan_without_gc_barrier
  ParScanWithBarrierClosure            _old_gen_closure; // scan_with_gc_barrier
  ParRootScanWithoutBarrierClosure     _to_space_root_closure; // scan_root_without_gc_barrier
-  // One of these two will be passed to process_roots, which will
-  // set its generation.  The first is for two-gen configs where the
-  // old gen collects the perm gen; the second is for arbitrary configs.
-  // The second isn't used right now (it used to be used for the train, an
-  // incremental collector) but the declaration has been left as a reminder.
+  // Will be passed to process_roots to set its generation.
  ParRootScanWithBarrierTwoGensClosure _older_gen_closure;
  // This closure will always be bound to the old gen; it will be used
  // in evacuate_followers.
@ -85,7 +81,6 @@ class ParScanThreadState {
  ParScanWeakRefClosure                _scan_weak_ref_closure;
  ParKeepAliveClosure                  _keep_alive_closure;

-
  Space* _to_space;
  Space* to_space() { return _to_space; }

--- a/hotspot/src/share/vm/gc/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc/g1/concurrentMark.cpp
@ -1143,7 +1143,7 @@ void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
  while (curr < end) {
    Prefetch::read(curr, interval);
    oop obj = oop(curr);
-    int size = obj->oop_iterate(&cl);
+    int size = obj->oop_iterate_size(&cl);
    assert(size == obj->size(), "sanity");
    curr += size;
  }
--- a/hotspot/src/share/vm/gc/g1/g1Allocator.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1Allocator.cpp
@ -367,7 +367,7 @@ bool G1ArchiveAllocator::alloc_new_region() {
  _max = _bottom + HeapRegion::min_region_size_in_words();

  // Tell mark-sweep that objects in this region are not to be marked.
-  G1MarkSweep::mark_range_archive(MemRegion(_bottom, HeapRegion::GrainWords));
+  G1MarkSweep::set_range_archive(MemRegion(_bottom, HeapRegion::GrainWords), true);

  // Since we've modified the old set, call update_sizes.
  _g1h->g1mm()->update_sizes();
--- a/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp
@ -27,6 +27,7 @@

 #include "gc/g1/g1BlockOffsetTable.hpp"
 #include "gc/g1/heapRegion.hpp"
+#include "gc/shared/memset_with_concurrent_readers.hpp"
 #include "gc/shared/space.hpp"

 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@ -68,15 +69,7 @@ void G1BlockOffsetSharedArray::set_offset_array(size_t left, size_t right, u_cha
  check_index(right, "right index out of range");
  assert(left <= right, "indexes out of order");
  size_t num_cards = right - left + 1;
-  if (UseMemSetInBOT) {
-    memset(&_offset_array[left], offset, num_cards);
-  } else {
-    size_t i = left;
-    const size_t end = i + num_cards;
-    for (; i < end; i++) {
-      _offset_array[i] = offset;
-    }
-  }
+  memset_with_concurrent_readers(&_offset_array[left], offset, num_cards);
 }

 // Variant of index_for that does not check the index for validity.
--- a/hotspot/src/share/vm/gc/g1/g1CodeBlobClosure.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1CodeBlobClosure.cpp
@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/nmethod.hpp"
+#include "gc/g1/g1CodeBlobClosure.hpp"
+#include "gc/g1/g1CollectedHeap.inline.hpp"
+#include "gc/g1/heapRegion.hpp"
+#include "gc/g1/heapRegionRemSet.hpp"
+#include "oops/oop.inline.hpp"
+
+template <typename T>
+void G1CodeBlobClosure::HeapRegionGatheringOopClosure::do_oop_work(T* p) {
+  _work->do_oop(p);
+  T oop_or_narrowoop = oopDesc::load_heap_oop(p);
+  if (!oopDesc::is_null(oop_or_narrowoop)) {
+    oop o = oopDesc::decode_heap_oop_not_null(oop_or_narrowoop);
+    HeapRegion* hr = _g1h->heap_region_containing_raw(o);
+    assert(!_g1h->obj_in_cs(o) || hr->rem_set()->strong_code_roots_list_contains(_nm), "if o still in collection set then evacuation failed and nm must already be in the remset");
+    hr->add_strong_code_root(_nm);
+  }
+}
+
+void G1CodeBlobClosure::HeapRegionGatheringOopClosure::do_oop(oop* o) {
+  do_oop_work(o);
+}
+
+void G1CodeBlobClosure::HeapRegionGatheringOopClosure::do_oop(narrowOop* o) {
+  do_oop_work(o);
+}
+
+void G1CodeBlobClosure::do_code_blob(CodeBlob* cb) {
+  nmethod* nm = cb->as_nmethod_or_null();
+  if (nm != NULL) {
+    if (!nm->test_set_oops_do_mark()) {
+      _oc.set_nm(nm);
+      nm->oops_do(&_oc);
+      nm->fix_oop_relocations();
+    }
+  }
+}
+
--- a/hotspot/src/share/vm/gc/g1/g1CodeBlobClosure.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1CodeBlobClosure.hpp
@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "gc/g1/g1CollectedHeap.hpp"
+#include "memory/iterator.hpp"
+
+class nmethod;
+
+class G1CodeBlobClosure : public CodeBlobClosure {
+  class HeapRegionGatheringOopClosure : public OopClosure {
+    G1CollectedHeap* _g1h;
+    OopClosure* _work;
+    nmethod* _nm;
+
+    template <typename T>
+    void do_oop_work(T* p);
+
+  public:
+    HeapRegionGatheringOopClosure(OopClosure* oc) : _g1h(G1CollectedHeap::heap()), _work(oc), _nm(NULL) {}
+
+    void do_oop(oop* o);
+    void do_oop(narrowOop* o);
+
+    void set_nm(nmethod* nm) {
+      _nm = nm;
+    }
+  };
+
+  HeapRegionGatheringOopClosure _oc;
+public:
+  G1CodeBlobClosure(OopClosure* oc) : _oc(oc) {}
+
+  void do_code_blob(CodeBlob* cb);
+};
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp
@ -65,6 +65,7 @@
 #include "memory/iterator.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"
+#include "runtime/init.hpp"
 #include "runtime/orderAccess.inline.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/globalDefinitions.hpp"
@ -949,6 +950,7 @@ bool G1CollectedHeap::check_archive_addresses(MemRegion* ranges, size_t count) {
 }

 bool G1CollectedHeap::alloc_archive_regions(MemRegion* ranges, size_t count) {
+  assert(!is_init_completed(), "Expect to be called at JVM init time");
  assert(ranges != NULL, "MemRegion array NULL");
  assert(count != 0, "No MemRegions provided");
  MutexLockerEx x(Heap_lock);
@ -1037,12 +1039,13 @@ bool G1CollectedHeap::alloc_archive_regions(MemRegion* ranges, size_t count) {
    }

    // Notify mark-sweep of the archive range.
-    G1MarkSweep::mark_range_archive(curr_range);
+    G1MarkSweep::set_range_archive(curr_range, true);
  }
  return true;
 }

 void G1CollectedHeap::fill_archive_regions(MemRegion* ranges, size_t count) {
+  assert(!is_init_completed(), "Expect to be called at JVM init time");
  assert(ranges != NULL, "MemRegion array NULL");
  assert(count != 0, "No MemRegions provided");
  MemRegion reserved = _hrm.reserved();
@ -1125,6 +1128,81 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
  return result;
 }

+void G1CollectedHeap::dealloc_archive_regions(MemRegion* ranges, size_t count) {
+  assert(!is_init_completed(), "Expect to be called at JVM init time");
+  assert(ranges != NULL, "MemRegion array NULL");
+  assert(count != 0, "No MemRegions provided");
+  MemRegion reserved = _hrm.reserved();
+  HeapWord* prev_last_addr = NULL;
+  HeapRegion* prev_last_region = NULL;
+  size_t size_used = 0;
+  size_t uncommitted_regions = 0;
+
+  // For each Memregion, free the G1 regions that constitute it, and
+  // notify mark-sweep that the range is no longer to be considered 'archive.'
+  MutexLockerEx x(Heap_lock);
+  for (size_t i = 0; i < count; i++) {
+    HeapWord* start_address = ranges[i].start();
+    HeapWord* last_address = ranges[i].last();
+
+    assert(reserved.contains(start_address) && reserved.contains(last_address),
+           err_msg("MemRegion outside of heap [" PTR_FORMAT ", " PTR_FORMAT "]",
+                   p2i(start_address), p2i(last_address)));
+    assert(start_address > prev_last_addr,
+           err_msg("Ranges not in ascending order: " PTR_FORMAT " <= " PTR_FORMAT ,
+                   p2i(start_address), p2i(prev_last_addr)));
+    size_used += ranges[i].byte_size();
+    prev_last_addr = last_address;
+
+    HeapRegion* start_region = _hrm.addr_to_region(start_address);
+    HeapRegion* last_region = _hrm.addr_to_region(last_address);
+
+    // Check for ranges that start in the same G1 region in which the previous
+    // range ended, and adjust the start address so we don't try to free
+    // the same region again. If the current range is entirely within that
+    // region, skip it.
+    if (start_region == prev_last_region) {
+      start_address = start_region->end();
+      if (start_address > last_address) {
+        continue;
+      }
+      start_region = _hrm.addr_to_region(start_address);
+    }
+    prev_last_region = last_region;
+
+    // After verifying that each region was marked as an archive region by
+    // alloc_archive_regions, set it free and empty and uncommit it.
+    HeapRegion* curr_region = start_region;
+    while (curr_region != NULL) {
+      guarantee(curr_region->is_archive(),
+                err_msg("Expected archive region at index %u", curr_region->hrm_index()));
+      uint curr_index = curr_region->hrm_index();
+      _old_set.remove(curr_region);
+      curr_region->set_free();
+      curr_region->set_top(curr_region->bottom());
+      if (curr_region != last_region) {
+        curr_region = _hrm.next_region_in_heap(curr_region);
+      } else {
+        curr_region = NULL;
+      }
+      _hrm.shrink_at(curr_index, 1);
+      uncommitted_regions++;
+    }
+
+    // Notify mark-sweep that this is no longer an archive range.
+    G1MarkSweep::set_range_archive(ranges[i], false);
+  }
+
+  if (uncommitted_regions != 0) {
+    ergo_verbose1(ErgoHeapSizing,
+                  "attempt heap shrinking",
+                  ergo_format_reason("uncommitted archive regions")
+                  ergo_format_byte("total size"),
+                  HeapRegion::GrainWords * HeapWordSize * uncommitted_regions);
+  }
+  decrease_used(size_used);
+}
+
 HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
                                                        uint* gc_count_before_ret,
                                                        uint* gclocker_retry_count_ret) {
@ -2845,9 +2923,9 @@ size_t G1CollectedHeap::tlab_used(Thread* ignored) const {
 }

 // For G1 TLABs should not contain humongous objects, so the maximum TLAB size
-// must be smaller than the humongous object limit.
+// must be equal to the humongous object limit.
 size_t G1CollectedHeap::max_tlab_size() const {
-  return align_size_down(_humongous_object_threshold_in_words - 1, MinObjAlignment);
+  return align_size_down(_humongous_object_threshold_in_words, MinObjAlignment);
 }

 size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
@ -4051,7 +4129,9 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
        g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE

-        g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);
+        g1_policy()->finalize_cset(target_pause_time_ms);
+
+        evacuation_info.set_collectionset_regions(g1_policy()->cset_region_length());

        register_humongous_regions_with_cset();

@ -4175,7 +4255,10 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
        // investigate this in CR 7178365.
        double sample_end_time_sec = os::elapsedTime();
        double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        g1_policy()->record_collection_pause_end(pause_time_ms, evacuation_info);
+        g1_policy()->record_collection_pause_end(pause_time_ms);
+
+        evacuation_info.set_collectionset_used_before(g1_policy()->collection_set_bytes_used_before());
+        evacuation_info.set_bytes_copied(g1_policy()->bytes_copied_during_gc());

        MemoryService::track_memory_usage();

@ -4501,8 +4584,7 @@ public:
                 bool only_young, bool claim)
        : _oop_closure(oop_closure),
          _oop_in_klass_closure(oop_closure->g1(),
-                                oop_closure->pss(),
-                                oop_closure->rp()),
+                                oop_closure->pss()),
          _klass_in_cld_closure(&_oop_in_klass_closure, only_young),
          _claim(claim) {

@ -4531,18 +4613,18 @@ public:
      bool only_young = _g1h->collector_state()->gcs_are_young();

      // Non-IM young GC.
-      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, pss);
      G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
                                                                               only_young, // Only process dirty klasses.
                                                                               false);     // No need to claim CLDs.
      // IM young GC.
      //    Strong roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, pss);
      G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
                                                                               false, // Process all klasses.
                                                                               true); // Need to claim CLDs.
      //    Weak roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, pss);
      G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
                                                                                    false, // Process all klasses.
                                                                                    true); // Need to claim CLDs.
@ -4582,9 +4664,9 @@ public:
                                      worker_id);

      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-      _root_processor->scan_remembered_sets(&push_heap_rs_cl,
-                                            weak_root_cl,
-                                            worker_id);
+      _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
+                                                      weak_root_cl,
+                                                      worker_id);
      double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;

      double term_sec = 0.0;
@ -5241,9 +5323,9 @@ public:
    G1ParScanThreadState*           pss = _pss[worker_id];
    pss->set_ref_processor(NULL);

-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss);

-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss);

    OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

@ -5341,9 +5423,9 @@ public:
    pss->set_ref_processor(NULL);
    assert(pss->queue_is_empty(), "both queue and overflow should be empty");

-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss);

-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss);

    OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

@ -5451,9 +5533,9 @@ void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** per_t
  // closures while we're actually processing the discovered
  // reference objects.

-  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, pss, NULL);
+  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, pss);

-  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss, NULL);
+  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss);

  OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;

--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp
@ -757,6 +757,12 @@ public:
  // alloc_archive_regions, and after class loading has occurred.
  void fill_archive_regions(MemRegion* range, size_t count);

+  // For each of the specified MemRegions, uncommit the containing G1 regions
+  // which had been allocated by alloc_archive_regions. This should be called
+  // rather than fill_archive_regions at JVM init time if the archive file
+  // mapping failed, with the same non-overlapping and sorted MemRegion array.
+  void dealloc_archive_regions(MemRegion* range, size_t count);
+
 protected:

  // Shrink the garbage-first heap by at most the given size (in bytes!).
--- a/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc/g1/g1CollectorPolicy.cpp
@ -181,15 +181,6 @@ G1CollectorPolicy::G1CollectorPolicy() :
    G1ErgoVerbose::set_enabled(false);
  }

-  // Verify PLAB sizes
-  const size_t region_size = HeapRegion::GrainWords;
-  if (YoungPLABSize > region_size || OldPLABSize > region_size) {
-    char buffer[128];
-    jio_snprintf(buffer, sizeof(buffer), "%sPLABSize should be at most " SIZE_FORMAT,
-                 OldPLABSize > region_size ? "Old" : "Young", region_size);
-    vm_exit_during_initialization(buffer);
-  }
-
  _recent_prev_end_times_for_all_gcs_sec->add(os::elapsedTime());
  _prev_collection_pause_end_ms = os::elapsedTime() * 1000.0;

@ -932,7 +923,7 @@ bool G1CollectorPolicy::need_to_start_conc_mark(const char* source, size_t alloc
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001

-void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, EvacuationInfo& evacuation_info) {
+void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms) {
  double end_time_sec = os::elapsedTime();
  assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
         "otherwise, the subtraction below does not make sense");
@ -964,9 +955,6 @@ void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, Evacua
  _mmu_tracker->add_pause(end_time_sec - pause_time_ms/1000.0,
                          end_time_sec, _g1->gc_tracer_stw()->gc_id());

-  evacuation_info.set_collectionset_used_before(_collection_set_bytes_used_before);
-  evacuation_info.set_bytes_copied(_bytes_copied_during_gc);
-
  if (update_stats) {
    _trace_young_gen_time_data.record_end_collection(pause_time_ms, phase_times());
    // this is where we update the allocation rate of the application
@ -1883,7 +1871,7 @@ uint G1CollectorPolicy::calc_max_old_cset_length() {
 }


-void G1CollectorPolicy::finalize_cset(double target_pause_time_ms, EvacuationInfo& evacuation_info) {
+void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
  double young_start_time_sec = os::elapsedTime();

  YoungList* young_list = _g1->young_list();
@ -2093,7 +2081,6 @@ void G1CollectorPolicy::finalize_cset(double target_pause_time_ms, EvacuationInf

  double non_young_end_time_sec = os::elapsedTime();
  phase_times()->record_non_young_cset_choice_time_ms((non_young_end_time_sec - non_young_start_time_sec) * 1000.0);
-  evacuation_info.set_collectionset_regions(cset_region_length());
 }

 void TraceYoungGenTimeData::record_start_collection(double time_to_stop_the_world_ms) {
--- a/Show More
+++ b/Show More