Merge

2016-02-18 18:10:35 +01:00 · 2016-02-18 18:10:35 +01:00 · 6f4adc7c72
commit 6f4adc7c72
parent eb4fc599e6 cc370ff8f2
65 changed files with 1141 additions and 355 deletions
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp
@ -54,6 +54,7 @@ define_pd_global(intx, INTPRESSURE,                  25);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
+define_pd_global(intx, LoopPercentProfileLimit,      10);
 // InitialCodeCacheSize derived from specjbb2000 run.
 define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
 define_pd_global(intx, CodeCacheExpansionSize,       64*K);
--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -30,39 +30,151 @@
 #include "vmreg_aarch64.inline.hpp"

 jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
-  Unimplemented();
-  return 0;
+  if (inst->is_call() || inst->is_jump() || inst->is_blr()) {
+    return pc_offset + NativeCall::instruction_size;
+  } else if (inst->is_general_jump()) {
+    return pc_offset + NativeGeneralJump::instruction_size;
+  } else {
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
+  }
 }

 void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
-  Unimplemented();
+  address pc = _instructions->start() + pc_offset;
+  Handle obj = HotSpotObjectConstantImpl::object(constant);
+  jobject value = JNIHandles::make_local(obj());
+  if (HotSpotObjectConstantImpl::compressed(constant)) {
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc, rspec, 1);
+    Unimplemented();
+  } else {
+    NativeMovConstReg* move = nativeMovConstReg_at(pc);
+    move->set_data((intptr_t) value);
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc, rspec);
+  }
 }

 void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
-  Unimplemented();
+  address pc = _instructions->start() + pc_offset;
+  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
+    narrowKlass narrowOop = record_narrow_metadata_reference(constant, CHECK);
+    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
+    Unimplemented();
+  } else {
+    NativeMovConstReg* move = nativeMovConstReg_at(pc);
+    Metadata* reference = record_metadata_reference(constant, CHECK);
+    move->set_data((intptr_t) reference);
+    TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
+  }
 }

-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
-  Unimplemented();
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
+  address pc = _instructions->start() + pc_offset;
+  NativeInstruction* inst = nativeInstruction_at(pc);
+  if (inst->is_adr_aligned()) {
+    address dest = _constants->start() + data_offset;
+    _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
+    TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
+  } else {
+    JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
+  }
 }

 void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
-  Unimplemented();
+  address pc = (address) inst;
+  if (inst->is_call()) {
+    NativeCall* call = nativeCall_at(pc);
+    call->set_destination((address) foreign_call_destination);
+    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
+  } else if (inst->is_jump()) {
+    NativeJump* jump = nativeJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
+  } else if (inst->is_general_jump()) {
+    NativeGeneralJump* jump = nativeGeneralJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
+  } else {
+    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
+  }
+  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
 }

 void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
-  Unimplemented();
+#ifdef ASSERT
+  Method* method = NULL;
+  // we need to check, this might also be an unresolved method
+  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
+    method = getMethodFromHotSpotMethod(hotspot_method());
+  }
+#endif
+  switch (_next_call_type) {
+    case INLINE_INVOKE:
+      break;
+    case INVOKEVIRTUAL:
+    case INVOKEINTERFACE: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
+      break;
+    }
+    case INVOKESTATIC: {
+      assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_static_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
+      break;
+    }
+    case INVOKESPECIAL: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
+      break;
+    }
+    default:
+      JVMCI_ERROR("invalid _next_call_type value");
+      break;
+  }
 }

 void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
-  Unimplemented();
+  switch (mark) {
+    case POLL_NEAR:
+      JVMCI_ERROR("unimplemented");
+      break;
+    case POLL_FAR:
+      _instructions->relocate(pc, relocInfo::poll_type);
+      break;
+    case POLL_RETURN_NEAR:
+      JVMCI_ERROR("unimplemented");
+      break;
+    case POLL_RETURN_FAR:
+      _instructions->relocate(pc, relocInfo::poll_return_type);
+      break;
+    default:
+      JVMCI_ERROR("invalid mark value");
+      break;
+  }
 }

 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
 VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
-  return NULL;
+  if (jvmci_reg < RegisterImpl::number_of_registers) {
+    return as_Register(jvmci_reg)->as_VMReg();
+  } else {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
+    if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
+      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
+    }
+    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
+  }
 }

 bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
-  return false;
+  return !hotspotRegister->is_FloatRegister();
 }
--- a/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.cpp
@ -136,7 +136,7 @@ void NativeMovConstReg::set_data(intptr_t x) {
    MacroAssembler::pd_patch_instruction(instruction_address(), (address)x);
    ICache::invalidate_range(instruction_address(), instruction_size);
  }
-};
+}

 void NativeMovConstReg::print() {
  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
@ -208,6 +208,32 @@ void NativeJump::set_jump_destination(address dest) {

 //-------------------------------------------------------------------

+address NativeGeneralJump::jump_destination() const {
+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
+  address dest = (address) move->data();
+
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+
+  // return -1 if jump to self
+  dest = (dest == (address) this) ? (address) -1 : dest;
+  return dest;
+}
+
+void NativeGeneralJump::set_jump_destination(address dest) {
+  NativeMovConstReg* move = nativeMovConstReg_at(instruction_address());
+
+  // We use jump to self as the unresolved address which the inline
+  // cache code (and relocs) know about
+  if (dest == (address) -1) {
+    dest = instruction_address();
+  }
+
+  move->set_data((uintptr_t) dest);
+};
+
+//-------------------------------------------------------------------
+
 bool NativeInstruction::is_safepoint_poll() {
  // a safepoint_poll is implemented in two steps as either
  //
@ -249,6 +275,22 @@ bool NativeInstruction::is_ldrw_to_zr(address instr) {
          Instruction_aarch64::extract(insn, 4, 0) == 0b11111);
 }

+bool NativeInstruction::is_general_jump() {
+  if (is_movz()) {
+    NativeInstruction* inst1 = nativeInstruction_at(addr_at(instruction_size * 1));
+    if (inst1->is_movk()) {
+      NativeInstruction* inst2 = nativeInstruction_at(addr_at(instruction_size * 2));
+      if (inst2->is_movk()) {
+        NativeInstruction* inst3 = nativeInstruction_at(addr_at(instruction_size * 3));
+        if (inst3->is_blr()) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 bool NativeInstruction::is_movz() {
  return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b10100101;
 }
--- a/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp
+++ b/hotspot/src/cpu/aarch64/vm/nativeInst_aarch64.hpp
@ -54,11 +54,22 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
  friend class Relocation;
  friend bool is_NativeCallTrampolineStub_at(address);
 public:
-  enum { instruction_size = 4 };
+  enum {
+    instruction_size = 4
+  };
+
+  juint encoding() const {
+    return uint_at(0);
+  }
+
+  bool is_blr()                      const { return (encoding() & 0xfffffc1f) == 0xd63f0000; }
+  bool is_adr_aligned()              const { return (encoding() & 0xff000000) == 0x10000000; } // adr Xn, <label>, where label is aligned to 4 bytes (address of instruction).
+
  inline bool is_nop();
  inline bool is_illegal();
  inline bool is_return();
  bool is_jump();
+  bool is_general_jump();
  inline bool is_jump_or_nop();
  inline bool is_cond_jump();
  bool is_safepoint_poll();
@ -341,11 +352,15 @@ class NativeMovRegMemPatching: public NativeMovRegMem {
 // An interface for accessing/manipulating native leal instruction of form:
 //        leal reg, [reg + offset]

-class NativeLoadAddress: public NativeMovRegMem {
-  static const bool has_rex = true;
-  static const int rex_size = 1;
- public:
+class NativeLoadAddress: public NativeInstruction {
+  enum AArch64_specific_constants {
+    instruction_size            =    4,
+    instruction_offset          =    0,
+    data_offset                 =    0,
+    next_instruction_offset     =    4
+  };

+ public:
  void verify();
  void print ();

@ -398,6 +413,10 @@ public:
    data_offset                 =    0,
    next_instruction_offset     =    4 * 4
  };
+
+  address jump_destination() const;
+  void set_jump_destination(address dest);
+
  static void insert_unconditional(address code_pos, address entry);
  static void replace_mt_safe(address instr_addr, address code_buffer);
  static void verify();
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
@ -54,6 +54,7 @@ define_pd_global(intx, RegisterCostAreaRatio,        16000);
 define_pd_global(bool, UseTLAB,                      true);
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60);
+define_pd_global(intx, LoopPercentProfileLimit,      10);

 // Peephole and CISC spilling both break the graph, and so make the
 // scheduler sick.
--- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -42,7 +42,7 @@ void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, T
  Unimplemented();
 }

-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
  Unimplemented();
 }

--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
@ -52,6 +52,7 @@ define_pd_global(intx, RegisterCostAreaRatio,        12000);
 define_pd_global(bool, UseTLAB,                      true);
 define_pd_global(bool, ResizeTLAB,                   true);
 define_pd_global(intx, LoopUnrollLimit,              60); // Design center runs on 1.3.1
+define_pd_global(intx, LoopPercentProfileLimit,      10);
 define_pd_global(intx, MinJumpTableSize,             5);

 // Peephole and CISC spilling both break the graph, and so makes the
--- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -85,7 +85,7 @@ void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, T
  }
 }

-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
  address pc = _instructions->start() + pc_offset;
  NativeInstruction* inst = nativeInstruction_at(pc);
  NativeInstruction* inst1 = nativeInstruction_at(pc + 4);
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp
@ -2015,23 +2015,33 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  int vep_offset = ((intptr_t)__ pc()) - start;

 #ifdef COMPILER1
-  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
-    // Object.hashCode can pull the hashCode from the header word
-    // instead of doing a full VM transition once it's been computed.
-    // Since hashCode is usually polymorphic at call sites we can't do
-    // this optimization at the call site without a lot of work.
+  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+    // Object.hashCode, System.identityHashCode can pull the hashCode from the
+    // header word instead of doing a full VM transition once it's been computed.
+    // Since hashCode is usually polymorphic at call sites we can't do this
+    // optimization at the call site without a lot of work.
    Label slowCase;
-    Register receiver             = O0;
+    Label done;
+    Register obj_reg              = O0;
    Register result               = O0;
    Register header               = G3_scratch;
    Register hash                 = G3_scratch; // overwrite header value with hash value
    Register mask                 = G1;         // to get hash field from header

+    // Unlike for Object.hashCode, System.identityHashCode is static method and
+    // gets object as argument instead of the receiver.
+    if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
+      assert(method->is_static(), "method should be static");
+      // return 0 for null reference input
+      __ br_null(obj_reg, false, Assembler::pn, done);
+      __ delayed()->mov(obj_reg, hash);
+    }
+
    // Read the header and build a mask to get its hash field.  Give up if the object is not unlocked.
    // We depend on hash_mask being at most 32 bits and avoid the use of
    // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
    // vm: see markOop.hpp.
-    __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header);
+    __ ld_ptr(obj_reg, oopDesc::mark_offset_in_bytes(), header);
    __ sethi(markOopDesc::hash_mask, mask);
    __ btst(markOopDesc::unlocked_value, header);
    __ br(Assembler::zero, false, Assembler::pn, slowCase);
@ -2054,6 +2064,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
    __ delayed()->nop();

    // leaf return.
+    __ bind(done);
    __ retl();
    __ delayed()->mov(hash, result);
    __ bind(slowCase);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -2361,7 +2361,7 @@ void Assembler::movdqa(XMMRegister dst, Address src) {
 void Assembler::movdqu(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x6F);
@ -2398,7 +2398,7 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
  assert(UseAVX > 0, "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x6F);
@ -2486,7 +2486,7 @@ void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
 void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x6F);
@ -2515,7 +2515,7 @@ void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x6F);
@ -2640,7 +2640,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb

 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x10);
  emit_int8((unsigned char)(0xC0 | encode));
@ -2649,7 +2649,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) {
 void Assembler::movsd(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x10);
@ -2668,7 +2668,7 @@ void Assembler::movsd(Address dst, XMMRegister src) {

 void Assembler::movss(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x10);
  emit_int8((unsigned char)(0xC0 | encode));
@ -2677,7 +2677,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) {
 void Assembler::movss(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x10);
@ -2782,7 +2782,7 @@ void Assembler::mull(Register src) {
 void Assembler::mulsd(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
@ -2791,7 +2791,7 @@ void Assembler::mulsd(XMMRegister dst, Address src) {

 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
  emit_int8((unsigned char)(0xC0 | encode));
@ -2800,7 +2800,7 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
 void Assembler::mulss(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
@ -2809,7 +2809,7 @@ void Assembler::mulss(XMMRegister dst, Address src) {

 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
  emit_int8((unsigned char)(0xC0 | encode));
@ -3993,7 +3993,7 @@ void Assembler::smovl() {

 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x51);
  emit_int8((unsigned char)(0xC0 | encode));
@ -4002,7 +4002,7 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x51);
@ -4011,7 +4011,7 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) {

 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x51);
  emit_int8((unsigned char)(0xC0 | encode));
@ -4024,7 +4024,7 @@ void Assembler::std() {
 void Assembler::sqrtss(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x51);
@ -4078,7 +4078,7 @@ void Assembler::subl(Register dst, Register src) {

 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
  emit_int8((unsigned char)(0xC0 | encode));
@ -4087,7 +4087,7 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) {
 void Assembler::subsd(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
@ -4096,7 +4096,7 @@ void Assembler::subsd(XMMRegister dst, Address src) {

 void Assembler::subss(XMMRegister dst, XMMRegister src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ false);
  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
  emit_int8((unsigned char)(0xC0 | encode));
@ -4105,7 +4105,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) {
 void Assembler::subss(XMMRegister dst, Address src) {
  NOT_LP64(assert(VM_Version::supports_sse(), ""));
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
@ -4293,7 +4293,7 @@ void Assembler::xorb(Register dst, Address src) {
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4303,7 +4303,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x58);
@ -4313,7 +4313,7 @@ void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4323,7 +4323,7 @@ void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x58);
@ -4333,7 +4333,7 @@ void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4343,7 +4343,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5E);
@ -4353,7 +4353,7 @@ void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4363,7 +4363,7 @@ void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5E);
@ -4373,7 +4373,7 @@ void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4383,7 +4383,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
@ -4393,7 +4393,7 @@ void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4403,7 +4403,7 @@ void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x59);
@ -4413,7 +4413,7 @@ void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
@ -4423,7 +4423,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
@ -4433,7 +4433,7 @@ void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
  assert(VM_Version::supports_avx(), "");
  InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
@ -4443,7 +4443,7 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {

 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
  assert(VM_Version::supports_avx(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
  emit_int8(0x5C);
@ -5901,7 +5901,7 @@ void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x78);
  emit_int8((unsigned char)(0xC0 | encode));
@ -5911,7 +5911,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
  assert(dst != xnoreg, "sanity");
  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
  // swap src<->dst for encoding
  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -5922,7 +5922,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x79);
  emit_int8((unsigned char)(0xC0 | encode));
@ -5932,7 +5932,7 @@ void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
  assert(dst != xnoreg, "sanity");
  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
  // swap src<->dst for encoding
  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@ -6027,7 +6027,7 @@ void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x7A);
  emit_int8((unsigned char)(0xC0 | encode));
@ -6036,7 +6036,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
  assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
  emit_int8(0x7B);
  emit_int8((unsigned char)(0xC0 | encode));
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -2044,11 +2044,11 @@ private:
 class InstructionAttr {
 public:
  InstructionAttr(
-    int vector_len,
-    bool rex_vex_w,
-    bool legacy_mode,
-    bool no_reg_mask,
-    bool uses_vl)
+    int vector_len,     // The length of vector to be applied in encoding - for both AVX and EVEX
+    bool rex_vex_w,     // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
+    bool legacy_mode,   // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
+    bool no_reg_mask,   // when true, k0 is used when EVEX encoding is chosen, else k1 is used under the same condition
+    bool uses_vl)       // This instruction may have legacy constraints based on vector length for EVEX
    :
      _avx_vector_len(vector_len),
      _rex_vex_w(rex_vex_w),
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp
@ -46,6 +46,7 @@ define_pd_global(intx, OnStackReplacePercentage,     140);
 define_pd_global(intx, ConditionalMoveLimit,         3);
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
+define_pd_global(intx, LoopPercentProfileLimit,      30);
 #ifdef AMD64
 define_pd_global(intx, INTPRESSURE,                  13);
 define_pd_global(intx, FLOATPRESSURE,                14);
--- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -101,7 +101,7 @@ void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, T
  }
 }

-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
  address pc = _instructions->start() + pc_offset;

  address operand = Assembler::locate_operand(pc, Assembler::disp32_operand);
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp
@ -7840,7 +7840,9 @@ void MacroAssembler::string_compare(Register str1, Register str2,
  Label COMPARE_WIDE_VECTORS_LOOP_FAILED;  // used only _LP64 && AVX3
  int stride, stride2, adr_stride, adr_stride1, adr_stride2;
  int stride2x2 = 0x40;
-  Address::ScaleFactor scale, scale1, scale2;
+  Address::ScaleFactor scale = Address::no_scale;
+  Address::ScaleFactor scale1 = Address::no_scale;
+  Address::ScaleFactor scale2 = Address::no_scale;

  if (ae != StrIntrinsicNode::LL) {
    stride2x2 = 0x20;
@ -7894,9 +7896,9 @@ void MacroAssembler::string_compare(Register str1, Register str2,
      stride = 8;
    }
  } else {
-    scale = Address::no_scale;  // not used
    scale1 = Address::times_1;
    scale2 = Address::times_2;
+    // scale not used
    stride = 8;
  }

--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86.cpp
@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_x86.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif //COMPILER1
+
+#define __ masm->
+
+#ifdef COMPILER1
+// ---------------------------------------------------------------------------
+// Object.hashCode, System.identityHashCode can pull the hashCode from the
+// header word instead of doing a full VM transition once it's been computed.
+// Since hashCode is usually polymorphic at call sites we can't do this
+// optimization at the call site without a lot of work.
+void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* masm,
+                                 methodHandle method,
+                                 Register obj_reg,
+                                 Register result) {
+  Label slowCase;
+
+  // Unlike for Object.hashCode, System.identityHashCode is static method and
+  // gets object as argument instead of the receiver.
+  if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
+    Label Continue;
+    // return 0 for null reference input
+    __ cmpptr(obj_reg, (int32_t)NULL_WORD);
+    __ jcc(Assembler::notEqual, Continue);
+    __ xorptr(result, result);
+    __ ret(0);
+    __ bind(Continue);
+  }
+
+  __ movptr(result, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+
+  // check if locked
+  __ testptr(result, markOopDesc::unlocked_value);
+  __ jcc(Assembler::zero, slowCase);
+
+  if (UseBiasedLocking) {
+    // Check if biased and fall through to runtime if so
+    __ testptr(result, markOopDesc::biased_lock_bit_in_place);
+    __ jcc(Assembler::notZero, slowCase);
+  }
+
+  // get hash
+#ifdef _LP64
+  // Read the header and build a mask to get its hash field.
+  // Depend on hash_mask being at most 32 bits and avoid the use of hash_mask_in_place
+  // because it could be larger than 32 bits in a 64-bit vm. See markOop.hpp.
+  __ shrptr(result, markOopDesc::hash_shift);
+  __ andptr(result, markOopDesc::hash_mask);
+#else
+  __ andptr(result, markOopDesc::hash_mask_in_place);
+#endif //_LP64
+
+  // test if hashCode exists
+  __ jcc(Assembler::zero, slowCase);
+#ifndef _LP64
+  __ shrptr(result, markOopDesc::hash_shift);
+#endif
+  __ ret(0);
+  __ bind(slowCase);
+}
+#endif //COMPILER1
+
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@ -1754,34 +1754,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
  int vep_offset = ((intptr_t)__ pc()) - start;

 #ifdef COMPILER1
-  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
-    // Object.hashCode can pull the hashCode from the header word
-    // instead of doing a full VM transition once it's been computed.
-    // Since hashCode is usually polymorphic at call sites we can't do
-    // this optimization at the call site without a lot of work.
-    Label slowCase;
-    Register receiver = rcx;
-    Register result = rax;
-    __ movptr(result, Address(receiver, oopDesc::mark_offset_in_bytes()));
-
-    // check if locked
-    __ testptr(result, markOopDesc::unlocked_value);
-    __ jcc (Assembler::zero, slowCase);
-
-    if (UseBiasedLocking) {
-      // Check if biased and fall through to runtime if so
-      __ testptr(result, markOopDesc::biased_lock_bit_in_place);
-      __ jcc (Assembler::notZero, slowCase);
-    }
-
-    // get hash
-    __ andptr(result, markOopDesc::hash_mask_in_place);
-    // test if hashCode exists
-    __ jcc  (Assembler::zero, slowCase);
-    __ shrptr(result, markOopDesc::hash_shift);
-    __ ret(0);
-    __ bind (slowCase);
-  }
+  // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+    inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/);
+   }
 #endif // COMPILER1

  // The instruction at the verified entry point must be 5 bytes or longer
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@ -2058,6 +2058,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,

  int vep_offset = ((intptr_t)__ pc()) - start;

+#ifdef COMPILER1
+  // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
+    inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
+  }
+#endif // COMPILER1
+
  // The instruction at the verified entry point must be 5 bytes or longer
  // because it can be patched on the fly by make_non_entrant. The stack bang
  // instruction fits that requirement.
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.aarch64/src/jdk/vm/ci/aarch64/AArch64.java
@ -70,56 +70,64 @@ public class AArch64 extends Architecture {
    public static final Register r28 = new Register(28, 28, "r28", CPU);
    public static final Register r29 = new Register(29, 29, "r29", CPU);
    public static final Register r30 = new Register(30, 30, "r30", CPU);
+
+    /*
+     * r31 is not a general purpose register, but represents either the stackpointer or the
+     * zero/discard register depending on the instruction. So we represent those two uses as two
+     * different registers. The register numbers are kept in sync with register_aarch64.hpp and have
+     * to be sequential, hence we also need a general r31 register here, which is never used.
+     */
    public static final Register r31 = new Register(31, 31, "r31", CPU);
+    public static final Register zr = new Register(32, 31, "zr", CPU);
+    public static final Register sp = new Register(33, 31, "sp", CPU);

    public static final Register lr = r30;
-    public static final Register zr = r31;
-    public static final Register sp = r31;

    // @formatter:off
    public static final Register[] cpuRegisters = {
        r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
        r8,  r9,  r10, r11, r12, r13, r14, r15,
        r16, r17, r18, r19, r20, r21, r22, r23,
-        r24, r25, r26, r27, r28, r29, r30, r31
+        r24, r25, r26, r27, r28, r29, r30, r31,
+        zr,  sp
    };
    // @formatter:on

    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");

    // Simd registers
-    public static final Register v0 = new Register(32, 0, "v0", SIMD);
-    public static final Register v1 = new Register(33, 1, "v1", SIMD);
-    public static final Register v2 = new Register(34, 2, "v2", SIMD);
-    public static final Register v3 = new Register(35, 3, "v3", SIMD);
-    public static final Register v4 = new Register(36, 4, "v4", SIMD);
-    public static final Register v5 = new Register(37, 5, "v5", SIMD);
-    public static final Register v6 = new Register(38, 6, "v6", SIMD);
-    public static final Register v7 = new Register(39, 7, "v7", SIMD);
-    public static final Register v8 = new Register(40, 8, "v8", SIMD);
-    public static final Register v9 = new Register(41, 9, "v9", SIMD);
-    public static final Register v10 = new Register(42, 10, "v10", SIMD);
-    public static final Register v11 = new Register(43, 11, "v11", SIMD);
-    public static final Register v12 = new Register(44, 12, "v12", SIMD);
-    public static final Register v13 = new Register(45, 13, "v13", SIMD);
-    public static final Register v14 = new Register(46, 14, "v14", SIMD);
-    public static final Register v15 = new Register(47, 15, "v15", SIMD);
-    public static final Register v16 = new Register(48, 16, "v16", SIMD);
-    public static final Register v17 = new Register(49, 17, "v17", SIMD);
-    public static final Register v18 = new Register(50, 18, "v18", SIMD);
-    public static final Register v19 = new Register(51, 19, "v19", SIMD);
-    public static final Register v20 = new Register(52, 20, "v20", SIMD);
-    public static final Register v21 = new Register(53, 21, "v21", SIMD);
-    public static final Register v22 = new Register(54, 22, "v22", SIMD);
-    public static final Register v23 = new Register(55, 23, "v23", SIMD);
-    public static final Register v24 = new Register(56, 24, "v24", SIMD);
-    public static final Register v25 = new Register(57, 25, "v25", SIMD);
-    public static final Register v26 = new Register(58, 26, "v26", SIMD);
-    public static final Register v27 = new Register(59, 27, "v27", SIMD);
-    public static final Register v28 = new Register(60, 28, "v28", SIMD);
-    public static final Register v29 = new Register(61, 29, "v29", SIMD);
-    public static final Register v30 = new Register(62, 30, "v30", SIMD);
-    public static final Register v31 = new Register(63, 31, "v31", SIMD);
+    public static final Register v0 = new Register(34, 0, "v0", SIMD);
+    public static final Register v1 = new Register(35, 1, "v1", SIMD);
+    public static final Register v2 = new Register(36, 2, "v2", SIMD);
+    public static final Register v3 = new Register(37, 3, "v3", SIMD);
+    public static final Register v4 = new Register(38, 4, "v4", SIMD);
+    public static final Register v5 = new Register(39, 5, "v5", SIMD);
+    public static final Register v6 = new Register(40, 6, "v6", SIMD);
+    public static final Register v7 = new Register(41, 7, "v7", SIMD);
+    public static final Register v8 = new Register(42, 8, "v8", SIMD);
+    public static final Register v9 = new Register(43, 9, "v9", SIMD);
+    public static final Register v10 = new Register(44, 10, "v10", SIMD);
+    public static final Register v11 = new Register(45, 11, "v11", SIMD);
+    public static final Register v12 = new Register(46, 12, "v12", SIMD);
+    public static final Register v13 = new Register(47, 13, "v13", SIMD);
+    public static final Register v14 = new Register(48, 14, "v14", SIMD);
+    public static final Register v15 = new Register(49, 15, "v15", SIMD);
+    public static final Register v16 = new Register(50, 16, "v16", SIMD);
+    public static final Register v17 = new Register(51, 17, "v17", SIMD);
+    public static final Register v18 = new Register(52, 18, "v18", SIMD);
+    public static final Register v19 = new Register(53, 19, "v19", SIMD);
+    public static final Register v20 = new Register(54, 20, "v20", SIMD);
+    public static final Register v21 = new Register(55, 21, "v21", SIMD);
+    public static final Register v22 = new Register(56, 22, "v22", SIMD);
+    public static final Register v23 = new Register(57, 23, "v23", SIMD);
+    public static final Register v24 = new Register(58, 24, "v24", SIMD);
+    public static final Register v25 = new Register(59, 25, "v25", SIMD);
+    public static final Register v26 = new Register(60, 26, "v26", SIMD);
+    public static final Register v27 = new Register(61, 27, "v27", SIMD);
+    public static final Register v28 = new Register(62, 28, "v28", SIMD);
+    public static final Register v29 = new Register(63, 29, "v29", SIMD);
+    public static final Register v30 = new Register(64, 30, "v30", SIMD);
+    public static final Register v31 = new Register(65, 31, "v31", SIMD);

    // @formatter:off
    public static final Register[] simdRegisters = {
@ -136,6 +144,7 @@ public class AArch64 extends Architecture {
        r8,  r9,  r10, r11, r12, r13, r14, r15,
        r16, r17, r18, r19, r20, r21, r22, r23,
        r24, r25, r26, r27, r28, r29, r30, r31,
+        zr,  sp,

        v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7,
        v8,  v9,  v10, v11, v12, v13, v14, v15,
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.aarch64/src/jdk/vm/ci/hotspot/aarch64/AArch64HotSpotRegisterConfig.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.aarch64/src/jdk/vm/ci/hotspot/aarch64/AArch64HotSpotRegisterConfig.java
@ -31,6 +31,7 @@ import static jdk.vm.ci.aarch64.AArch64.r27;
 import static jdk.vm.ci.aarch64.AArch64.r28;
 import static jdk.vm.ci.aarch64.AArch64.r29;
 import static jdk.vm.ci.aarch64.AArch64.r3;
+import static jdk.vm.ci.aarch64.AArch64.r31;
 import static jdk.vm.ci.aarch64.AArch64.r4;
 import static jdk.vm.ci.aarch64.AArch64.r5;
 import static jdk.vm.ci.aarch64.AArch64.r6;
@ -45,11 +46,13 @@ import static jdk.vm.ci.aarch64.AArch64.v4;
 import static jdk.vm.ci.aarch64.AArch64.v5;
 import static jdk.vm.ci.aarch64.AArch64.v6;
 import static jdk.vm.ci.aarch64.AArch64.v7;
+import static jdk.vm.ci.aarch64.AArch64.zr;

 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;

 import jdk.vm.ci.aarch64.AArch64;
@ -130,16 +133,20 @@ public class AArch64HotSpotRegisterConfig implements RegisterConfig {
    public static final Register threadRegister = r28;
    public static final Register fp = r29;

+    private static final Register[] reservedRegisters = {threadRegister, fp, lr, r31, zr, sp};
+
    private static Register[] initAllocatable(Architecture arch, boolean reserveForHeapBase) {
        Register[] allRegisters = arch.getAvailableValueRegisters();
-        Register[] registers = new Register[allRegisters.length - (reserveForHeapBase ? 5 : 4)];
+        Register[] registers = new Register[allRegisters.length - reservedRegisters.length - (reserveForHeapBase ? 1 : 0)];
+        List<Register> reservedRegistersList = Arrays.asList(reservedRegisters);

        int idx = 0;
        for (Register reg : allRegisters) {
-            if (reg.equals(threadRegister) || reg.equals(fp) || reg.equals(lr) || reg.equals(sp)) {
-                // skip thread register, frame pointer, link register and stack pointer
+            if (reservedRegistersList.contains(reg)) {
+                // skip reserved registers
                continue;
            }
+            assert !(reg.equals(threadRegister) || reg.equals(fp) || reg.equals(lr) || reg.equals(r31) || reg.equals(zr) || reg.equals(sp));
            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
                // skip heap base register
                continue;
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotRegisterConfig.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotRegisterConfig.java
@ -45,6 +45,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Set;

 import jdk.vm.ci.code.Architecture;
@ -119,14 +120,17 @@ public class AMD64HotSpotRegisterConfig implements RegisterConfig {
     */
    private final boolean needsNativeStackHomeSpace;

+    private static final Register[] reservedRegisters = {rsp, r15};
+
    private static Register[] initAllocatable(Architecture arch, boolean reserveForHeapBase) {
        Register[] allRegisters = arch.getAvailableValueRegisters();
-        Register[] registers = new Register[allRegisters.length - (reserveForHeapBase ? 3 : 2)];
+        Register[] registers = new Register[allRegisters.length - reservedRegisters.length - (reserveForHeapBase ? 1 : 0)];
+        List<Register> reservedRegistersList = Arrays.asList(reservedRegisters);

        int idx = 0;
        for (Register reg : allRegisters) {
-            if (reg.equals(rsp) || reg.equals(r15)) {
-                // skip stack pointer and thread register
+            if (reservedRegistersList.contains(reg)) {
+                // skip reserved registers
                continue;
            }
            if (reserveForHeapBase && reg.equals(r12)) {
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotRegisterConfig.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotRegisterConfig.java
@ -68,6 +68,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;

 import jdk.vm.ci.code.Architecture;
 import jdk.vm.ci.code.CallingConvention;
@ -140,14 +141,17 @@ public class SPARCHotSpotRegisterConfig implements RegisterConfig {
                    i0, i1, i2, i3, i4, i5, i6, i7};
    // @formatter:on

+    private static final Register[] reservedRegisters = {sp, g0, g2};
+
    private static Register[] initAllocatable(Architecture arch, boolean reserveForHeapBase) {
        Register[] allRegisters = arch.getAvailableValueRegisters();
-        Register[] registers = new Register[allRegisters.length - (reserveForHeapBase ? 4 : 3)];
+        Register[] registers = new Register[allRegisters.length - reservedRegisters.length - (reserveForHeapBase ? 1 : 0)];
+        List<Register> reservedRegistersList = Arrays.asList(reservedRegisters);

        int idx = 0;
        for (Register reg : allRegisters) {
-            if (reg.equals(sp) || reg.equals(g2) || reg.equals(g0)) {
-                // skip g0, stack pointer and thread register
+            if (reservedRegistersList.contains(reg)) {
+                // skip reserved registers
                continue;
            }
            if (reserveForHeapBase && reg.equals(g6)) {
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -26,6 +26,7 @@ import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntimeProvider.getArrayBaseOffset;
 import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntimeProvider.getArrayIndexScale;

 import java.lang.reflect.Array;
+import java.util.Objects;

 import jdk.vm.ci.common.JVMCIError;
 import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.Option;
@ -70,13 +71,13 @@ public class HotSpotConstantReflectionProvider implements ConstantReflectionProv
        } else if (x instanceof HotSpotObjectConstantImpl) {
            return y instanceof HotSpotObjectConstantImpl && ((HotSpotObjectConstantImpl) x).object() == ((HotSpotObjectConstantImpl) y).object();
        } else {
-            return x.equals(y);
+            return Objects.equals(x, y);
        }
    }

    @Override
    public Integer readArrayLength(JavaConstant array) {
-        if (array.getJavaKind() != JavaKind.Object || array.isNull()) {
+        if (array == null || array.getJavaKind() != JavaKind.Object || array.isNull()) {
            return null;
        }

@ -133,12 +134,12 @@ public class HotSpotConstantReflectionProvider implements ConstantReflectionProv

    @Override
    public JavaConstant readArrayElement(JavaConstant array, int index) {
-        if (array.getJavaKind() != JavaKind.Object || array.isNull()) {
+        if (array == null || array.getJavaKind() != JavaKind.Object || array.isNull()) {
            return null;
        }
        Object a = ((HotSpotObjectConstantImpl) array).object();

-        if (index < 0 || index >= Array.getLength(a)) {
+        if (!a.getClass().isArray() || index < 0 || index >= Array.getLength(a)) {
            return null;
        }

@ -184,7 +185,7 @@ public class HotSpotConstantReflectionProvider implements ConstantReflectionProv

    @Override
    public JavaConstant boxPrimitive(JavaConstant source) {
-        if (!source.getJavaKind().isPrimitive() || !isBoxCached(source)) {
+        if (source == null || !source.getJavaKind().isPrimitive() || !isBoxCached(source)) {
            return null;
        }
        return HotSpotObjectConstantImpl.forObject(source.asBoxedPrimitive());
@ -192,7 +193,7 @@ public class HotSpotConstantReflectionProvider implements ConstantReflectionProv

    @Override
    public JavaConstant unboxPrimitive(JavaConstant source) {
-        if (!source.getJavaKind().isObject()) {
+        if (source == null || !source.getJavaKind().isObject()) {
            return null;
        }
        if (source.isNull()) {
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethod.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethod.java
@ -56,6 +56,13 @@ public interface HotSpotResolvedJavaMethod extends ResolvedJavaMethod {
     */
    boolean isDontInline();

+    /**
+     * Returns true if this method has a {@code ReservedStackAccess} annotation.
+     *
+     * @return true if ReservedStackAccess annotation present, false otherwise
+     */
+    boolean hasReservedStackAccess();
+
    /**
     * Manually adds a DontInline annotation to this method.
     */
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java
@ -308,6 +308,15 @@ final class HotSpotResolvedJavaMethodImpl extends HotSpotMethod implements HotSp
        return (getFlags() & config().methodFlagsDontInline) != 0;
    }

+    /**
+     * Returns true if this method has a {@code ReservedStackAccess} annotation.
+     *
+     * @return true if ReservedStackAccess annotation present, false otherwise
+     */
+    public boolean hasReservedStackAccess() {
+        return (getFlags() & config().methodFlagsReservedStackAccess) != 0;
+    }
+
    /**
     * Manually adds a DontInline annotation to this method.
     */
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java
@ -850,6 +850,7 @@ public class HotSpotVMConfig {
    @HotSpotVMFlag(name = "DontCompileHugeMethods") @Stable public boolean dontCompileHugeMethods;
    @HotSpotVMFlag(name = "HugeMethodLimit") @Stable public int hugeMethodLimit;
    @HotSpotVMFlag(name = "PrintInlining") @Stable public boolean printInlining;
+    @HotSpotVMFlag(name = "Inline") @Stable public boolean inline;
    @HotSpotVMFlag(name = "JVMCIUseFastLocking") @Stable public boolean useFastLocking;
    @HotSpotVMFlag(name = "ForceUnreachable") @Stable public boolean forceUnreachable;
    @HotSpotVMFlag(name = "CodeCacheSegmentSize") @Stable public int codeSegmentSize;
@ -974,6 +975,7 @@ public class HotSpotVMConfig {
    @HotSpotVMFlag(name = "BlockZeroingLowLimit", archs = {"sparc"}) @Stable public int blockZeroingLowLimit;

    @HotSpotVMFlag(name = "StackShadowPages") @Stable public int stackShadowPages;
+    @HotSpotVMFlag(name = "StackReservedPages") @Stable public int stackReservedPages;
    @HotSpotVMFlag(name = "UseStackBanging") @Stable public boolean useStackBanging;
    @HotSpotVMConstant(name = "STACK_BIAS") @Stable public int stackBias;
    @HotSpotVMField(name = "CompilerToVM::Data::vm_page_size", type = "int", get = HotSpotVMField.Type.VALUE) @Stable public int vmPageSize;
@ -1092,6 +1094,7 @@ public class HotSpotVMConfig {
    @HotSpotVMField(name = "JavaThread::_satb_mark_queue", type = "SATBMarkQueue", get = HotSpotVMField.Type.OFFSET) @Stable public int javaThreadSatbMarkQueueOffset;
    @HotSpotVMField(name = "JavaThread::_vm_result", type = "oop", get = HotSpotVMField.Type.OFFSET) @Stable public int threadObjectResultOffset;
    @HotSpotVMField(name = "JavaThread::_jvmci_counters", type = "jlong*", get = HotSpotVMField.Type.OFFSET) @Stable public int jvmciCountersThreadOffset;
+    @HotSpotVMField(name = "JavaThread::_reserved_stack_activation", type = "address", get = HotSpotVMField.Type.OFFSET) @Stable public int javaThreadReservedStackActivationOffset;

    /**
     * An invalid value for {@link #rtldDefault}.
@ -1235,6 +1238,7 @@ public class HotSpotVMConfig {
    @HotSpotVMConstant(name = "Method::_force_inline") @Stable public int methodFlagsForceInline;
    @HotSpotVMConstant(name = "Method::_dont_inline") @Stable public int methodFlagsDontInline;
    @HotSpotVMConstant(name = "Method::_hidden") @Stable public int methodFlagsHidden;
+    @HotSpotVMConstant(name = "Method::_reserved_stack_access") @Stable public int methodFlagsReservedStackAccess;
    @HotSpotVMConstant(name = "Method::nonvirtual_vtable_index") @Stable public int nonvirtualVtableIndex;
    @HotSpotVMConstant(name = "Method::invalid_vtable_index") @Stable public int invalidVtableIndex;

@ -1491,6 +1495,8 @@ public class HotSpotVMConfig {
    @HotSpotVMField(name = "StubRoutines::_updateBytesCRC32", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long updateBytesCRC32Stub;
    @HotSpotVMField(name = "StubRoutines::_crc_table_adr", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long crcTableAddress;

+    @HotSpotVMField(name = "StubRoutines::_throw_delayed_StackOverflowError_entry", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long throwDelayedStackOverflowErrorEntry;
+
    @HotSpotVMField(name = "StubRoutines::_jbyte_arraycopy", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long jbyteArraycopy;
    @HotSpotVMField(name = "StubRoutines::_jshort_arraycopy", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long jshortArraycopy;
    @HotSpotVMField(name = "StubRoutines::_jint_arraycopy", type = "address", get = HotSpotVMField.Type.VALUE) @Stable public long jintArraycopy;
@ -1548,6 +1554,7 @@ public class HotSpotVMConfig {
    @HotSpotVMAddress(name = "SharedRuntime::register_finalizer") @Stable public long registerFinalizerAddress;
    @HotSpotVMAddress(name = "SharedRuntime::exception_handler_for_return_address") @Stable public long exceptionHandlerForReturnAddressAddress;
    @HotSpotVMAddress(name = "SharedRuntime::OSR_migration_end") @Stable public long osrMigrationEndAddress;
+    @HotSpotVMAddress(name = "SharedRuntime::enable_stack_reserved_zone") @Stable public long enableStackReservedZoneAddress;

    @HotSpotVMAddress(name = "os::javaTimeMillis") @Stable public long javaTimeMillisAddress;
    @HotSpotVMAddress(name = "os::javaTimeNanos") @Stable public long javaTimeNanosAddress;
--- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/.checkstyle_checks.xml
+++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/.checkstyle_checks.xml
@ -0,0 +1,213 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE module PUBLIC "-//Puppy Crawl//DTD Check Configuration 1.3//EN" "http://www.puppycrawl.com/dtds/configuration_1_3.dtd">
+
+<!--
+    Checkstyle-Configuration: Checks
+    Description: none
+-->
+<module name="Checker">
+  <property name="severity" value="error"/>
+  <module name="TreeWalker">
+    <property name="tabWidth" value="4"/>
+    <module name="FileContentsHolder"/>
+    <module name="JavadocStyle">
+      <property name="checkHtml" value="false"/>
+    </module>
+    <module name="LocalFinalVariableName"/>
+    <module name="LocalVariableName"/>
+    <module name="MemberName">
+      <property name="format" value="^(([a-z][a-zA-Z0-9]*$)|(_[A-Z][a-zA-Z0-9]*_[a-z][a-zA-Z0-9]*$))"/>
+    </module>
+    <module name="MethodName"/>
+    <module name="PackageName"/>
+    <module name="ParameterName"/>
+    <module name="TypeName">
+      <property name="format" value="^[A-Z][_a-zA-Z0-9]*$"/>
+    </module>
+    <module name="RedundantImport"/>
+    <module name="LineLength">
+      <property name="max" value="250"/>
+    </module>
+    <module name="MethodParamPad"/>
+    <module name="NoWhitespaceAfter">
+      <property name="tokens" value="ARRAY_INIT,BNOT,DEC,DOT,INC,LNOT,UNARY_MINUS,UNARY_PLUS"/>
+    </module>
+    <module name="AvoidStarImport">
+       <property name="allowClassImports" value="false"/>
+       <property name="allowStaticMemberImports" value="false"/>
+    </module>
+    <module name="NoWhitespaceBefore">
+      <property name="tokens" value="SEMI,DOT,POST_DEC,POST_INC"/>
+    </module>
+    <module name="ParenPad"/>
+    <module name="TypecastParenPad">
+      <property name="tokens" value="RPAREN,TYPECAST"/>
+    </module>
+    <module name="WhitespaceAfter"/>
+    <module name="WhitespaceAround">
+      <property name="tokens" value="ASSIGN,BAND,BAND_ASSIGN,BOR,BOR_ASSIGN,BSR,BSR_ASSIGN,BXOR,BXOR_ASSIGN,COLON,DIV,DIV_ASSIGN,EQUAL,GE,GT,LAND,LE,LITERAL_ASSERT,LITERAL_CATCH,LITERAL_DO,LITERAL_ELSE,LITERAL_FINALLY,LITERAL_FOR,LITERAL_IF,LITERAL_RETURN,LITERAL_SYNCHRONIZED,LITERAL_TRY,LITERAL_WHILE,LOR,LT,MINUS,MINUS_ASSIGN,MOD,MOD_ASSIGN,NOT_EQUAL,PLUS,PLUS_ASSIGN,QUESTION,SL,SLIST,SL_ASSIGN,SR,SR_ASSIGN,STAR,STAR_ASSIGN,LITERAL_ASSERT,TYPE_EXTENSION_AND"/>
+    </module>
+    <module name="RedundantModifier"/>
+    <module name="AvoidNestedBlocks">
+      <property name="allowInSwitchCase" value="true"/>
+    </module>
+    <module name="EmptyBlock">
+      <property name="option" value="text"/>
+      <property name="tokens" value="LITERAL_DO,LITERAL_ELSE,LITERAL_FINALLY,LITERAL_IF,LITERAL_TRY,LITERAL_WHILE,STATIC_INIT"/>
+    </module>
+    <module name="LeftCurly"/>
+    <module name="NeedBraces"/>
+    <module name="RightCurly"/>
+    <module name="EmptyStatement"/>
+    <module name="HiddenField">
+      <property name="severity" value="ignore"/>
+      <property name="ignoreConstructorParameter" value="true"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="FinalClass"/>
+    <module name="HideUtilityClassConstructor">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="ArrayTypeStyle"/>
+    <module name="UpperEll"/>
+    <module name="FallThrough"/>
+    <module name="FinalLocalVariable">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="MultipleVariableDeclarations"/>
+    <module name="StringLiteralEquality">
+      <property name="severity" value="error"/>
+    </module>
+    <module name="SuperFinalize"/>
+    <module name="UnnecessaryParentheses">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="Indentation">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="StaticVariableName">
+      <property name="format" value="^[A-Za-z][a-zA-Z0-9]*$"/>
+    </module>
+    <module name="EmptyForInitializerPad"/>
+    <module name="EmptyForIteratorPad"/>
+    <module name="ModifierOrder"/>
+    <module name="DefaultComesLast"/>
+    <module name="InnerAssignment">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="ModifiedControlVariable"/>
+    <module name="MutableException">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="ParameterAssignment">
+      <property name="severity" value="ignore"/>
+      <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
+    </module>
+    <module name="RegexpSinglelineJava">
+      <metadata name="net.sf.eclipsecs.core.comment" value="Illegal trailing whitespace(s) at the end of the line."/>
+      <property name="format" value="\s$"/>
+      <property name="message" value="Illegal trailing whitespace(s) at the end of the line."/>
+      <property name="ignoreComments" value="true"/>
+      <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Checks for trailing spaces at the end of a line"/>
+    </module>
+    <module name="RegexpSinglelineJava">
+      <metadata name="net.sf.eclipsecs.core.comment" value="illegal space before a comma"/>
+      <property name="format" value=" ,"/>
+      <property name="message" value="illegal space before a comma"/>
+      <property name="ignoreComments" value="true"/>
+      <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Checks for whitespace before a comma."/>
+      <metadata name="com.atlassw.tools.eclipse.checkstyle.customMessage" value="Illegal whitespace before a comma."/>
+    </module>
+    <module name="RegexpSinglelineJava">
+      <property name="format" value="[^\x00-\x7F]"/>
+      <property name="message" value="Only use ASCII characters."/>
+    </module>
+    <module name="RegexpSinglelineJava">
+      <property name="format" value="new (Hashtable|Vector|Stack|StringBuffer)[^\w]"/>
+      <property name="message" value="Don't use old synchronized collection classes"/>
+    </module>
+  </module>
+  <module name="RegexpHeader">
+    <property name="header" value="/\*\n \* Copyright \(c\) (20[0-9][0-9], )?20[0-9][0-9], Oracle and/or its affiliates. All rights reserved.\n \* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.\n \*\n \* This code is free software; you can redistribute it and/or modify it\n \* under the terms of the GNU General Public License version 2 only, as\n \* published by the Free Software Foundation.\n \*\n \* This code is distributed in the hope that it will be useful, but WITHOUT\n \* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or\n \* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License\n \* version 2 for more details \(a copy is included in the LICENSE file that\n \* accompanied this code\).\n \*\n \* You should have received a copy of the GNU General Public License version\n \* 2 along with this work; if not, write to the Free Software Foundation,\n \* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.\n \*\n \* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA\n \* or visit www.oracle.com if you need additional information or have any\n \* questions.\n \*/\n"/>
+    <property name="fileExtensions" value="java"/>
+  </module>
+  <module name="FileTabCharacter">
+    <property name="severity" value="error"/>
+    <property name="fileExtensions" value="java"/>
+  </module>
+  <module name="NewlineAtEndOfFile">
+    <property name="lineSeparator" value="lf"/>
+  </module>
+  <module name="Translation"/>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="Checkstyle: stop constant name check"/>
+    <property name="onCommentFormat" value="Checkstyle: resume constant name check"/>
+    <property name="checkFormat" value="ConstantNameCheck"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Allow non-conforming constant names"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="Checkstyle: stop method name check"/>
+    <property name="onCommentFormat" value="Checkstyle: resume method name check"/>
+    <property name="checkFormat" value="MethodName"/>
+    <property name="checkC" value="false"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable method name checks"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="CheckStyle: stop parameter assignment check"/>
+    <property name="onCommentFormat" value="CheckStyle: resume parameter assignment check"/>
+    <property name="checkFormat" value="ParameterAssignment"/>
+    <property name="checkC" value="false"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable Parameter Assignment"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="Checkstyle: stop final variable check"/>
+    <property name="onCommentFormat" value="Checkstyle: resume final variable check"/>
+    <property name="checkFormat" value="FinalLocalVariable"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable final variable checks"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="Checkstyle: stop"/>
+    <property name="onCommentFormat" value="Checkstyle: resume"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable all checks"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="CheckStyle: stop inner assignment check"/>
+    <property name="onCommentFormat" value="CheckStyle: resume inner assignment check"/>
+    <property name="checkFormat" value="InnerAssignment"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable inner assignment checks"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="Checkstyle: stop field name check"/>
+    <property name="onCommentFormat" value="Checkstyle: resume field name check"/>
+    <property name="checkFormat" value="MemberName"/>
+    <property name="checkC" value="false"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable field name checks"/>
+  </module>
+  <module name="RegexpMultiline">
+    <metadata name="net.sf.eclipsecs.core.comment" value="illegal Windows line ending"/>
+    <property name="format" value="\r\n"/>
+    <property name="message" value="illegal Windows line ending"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="CheckStyle: stop header check"/>
+    <property name="onCommentFormat" value="CheckStyle: resume header check"/>
+    <property name="checkFormat" value=".*Header"/>
+    <metadata name="com.atlassw.tools.eclipse.checkstyle.comment" value="Disable header checks"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="CheckStyle: stop line length check"/>
+    <property name="onCommentFormat" value="CheckStyle: resume line length check"/>
+    <property name="checkFormat" value="LineLength"/>
+  </module>
+  <module name="SuppressionCommentFilter">
+    <property name="offCommentFormat" value="CheckStyle: start generated"/>
+    <property name="onCommentFormat" value="CheckStyle: stop generated"/>
+    <property name="checkFormat" value=".*Name|.*LineLength|.*Header"/>
+  </module>
+</module>
--- a/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
+++ b/hotspot/src/share/vm/c1/c1_Canonicalizer.cpp
@ -730,7 +730,7 @@ void Canonicalizer::do_If(If* x) {
    }
  } else if (rt == objectNull &&
           (l->as_NewInstance() || l->as_NewArray() ||
-             (UseNewCode && l->as_Local() && l->as_Local()->is_receiver()))) {
+             (l->as_Local() && l->as_Local()->is_receiver()))) {
    if (x->cond() == Instruction::eql) {
      BlockBegin* sux = x->fsux();
      set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
--- a/hotspot/src/share/vm/ci/ciField.cpp
+++ b/hotspot/src/share/vm/ci/ciField.cpp
@ -211,6 +211,12 @@ static bool trust_final_non_static_fields(ciInstanceKlass* holder) {
  // so there is no hacking of finals going on with them.
  if (holder->is_anonymous())
    return true;
+  // Trust final fields in all boxed classes
+  if (holder->is_box_klass())
+    return true;
+  // Trust final fields in String
+  if (holder->name() == ciSymbol::java_lang_String())
+    return true;
  // Trust Atomic*FieldUpdaters: they are very important for performance, and make up one
  // more reason not to use Unsafe, if their final fields are trusted. See more in JDK-8140483.
  if (holder->name() == ciSymbol::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_Impl() ||
--- a/hotspot/src/share/vm/code/codeBlob.cpp
+++ b/hotspot/src/share/vm/code/codeBlob.cpp
@ -291,6 +291,9 @@ MethodHandlesAdapterBlob* MethodHandlesAdapterBlob::create(int buffer_size) {
  {
    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
    blob = new (size) MethodHandlesAdapterBlob(size);
+    if (blob == NULL) {
+      vm_exit_out_of_memory(size, OOM_MALLOC_ERROR, "CodeCache: no room for method handle adapter blob");
+    }
  }
  // Track memory usage statistic after releasing CodeCache_lock
  MemoryService::track_code_cache_memory_usage();
--- a/hotspot/src/share/vm/code/nmethod.cpp
+++ b/hotspot/src/share/vm/code/nmethod.cpp
@ -33,6 +33,7 @@
 #include "compiler/compileBroker.hpp"
 #include "compiler/compileLog.hpp"
 #include "compiler/compilerDirectives.hpp"
+#include "compiler/directivesParser.hpp"
 #include "compiler/disassembler.hpp"
 #include "interpreter/bytecode.hpp"
 #include "oops/methodData.hpp"
@ -965,6 +966,12 @@ void nmethod::print_on(outputStream* st, const char* msg) const {
  }
 }

+void nmethod::maybe_print_nmethod(DirectiveSet* directive) {
+  bool printnmethods = directive->PrintAssemblyOption || directive->PrintNMethodsOption;
+  if (printnmethods || PrintDebugInfo || PrintRelocations || PrintDependencies || PrintExceptionHandlers) {
+    print_nmethod(printnmethods);
+  }
+}

 void nmethod::print_nmethod(bool printmethod) {
  ttyLocker ttyl;  // keep the following output all in one block
--- a/hotspot/src/share/vm/code/nmethod.hpp
+++ b/hotspot/src/share/vm/code/nmethod.hpp
@ -29,6 +29,8 @@
 #include "code/pcDesc.hpp"
 #include "oops/metadata.hpp"

+class DirectiveSet;
+
 // This class is used internally by nmethods, to cache
 // exception/pc/handler information.

@ -714,6 +716,8 @@ public:
  void print_nul_chk_table()                      PRODUCT_RETURN;
  void print_recorded_oops()                      PRODUCT_RETURN;
  void print_recorded_metadata()                  PRODUCT_RETURN;
+
+  void maybe_print_nmethod(DirectiveSet* directive);
  void print_nmethod(bool print_code);

  // need to re-define this from CodeBlob else the overload hides it
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp
@ -1919,12 +1919,9 @@ void CompileBroker::invoke_compiler_on_method(CompileTask* task) {

  collect_statistics(thread, time, task);

-  bool printnmethods = directive->PrintAssemblyOption || directive->PrintNMethodsOption;
-  if (printnmethods || PrintDebugInfo || PrintRelocations || PrintDependencies || PrintExceptionHandlers) {
-    nmethod* nm = task->code();
-    if (nm != NULL) {
-      nm->print_nmethod(printnmethods);
-    }
+  nmethod* nm = task->code();
+  if (nm != NULL) {
+    nm->maybe_print_nmethod(directive);
  }
  DirectivesStack::release(directive);

--- a/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.cpp
+++ b/hotspot/src/share/vm/gc/shared/memset_with_concurrent_readers.cpp
@ -34,7 +34,7 @@
 #if INCLUDE_ALL_GCS

 // Unit test
-#ifdef ASSERT
+#ifndef PRODUCT

 static unsigned line_byte(const char* line, size_t i) {
  return unsigned(line[i]) & 0xFF;
--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.cpp
@ -1091,7 +1091,7 @@ void CodeInstaller::site_DataPatch(CodeBuffer& buffer, jint pc_offset, Handle si
  } else if (reference->is_a(site_DataSectionReference::klass())) {
    int data_offset = site_DataSectionReference::offset(reference);
    if (0 <= data_offset && data_offset < _constants_size) {
-      pd_patch_DataSectionReference(pc_offset, data_offset);
+      pd_patch_DataSectionReference(pc_offset, data_offset, CHECK);
    } else {
      JVMCI_ERROR("data offset 0x%X points outside data section (size 0x%X)", data_offset, _constants_size);
    }
--- a/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp
+++ b/hotspot/src/share/vm/jvmci/jvmciCodeInstaller.hpp
@ -156,7 +156,7 @@ private:
  jint pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS);
  void pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS);
  void pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS);
-  void pd_patch_DataSectionReference(int pc_offset, int data_offset);
+  void pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS);
  void pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS);
  void pd_relocate_JavaMethod(Handle method, jint pc_offset, TRAPS);
  void pd_relocate_poll(address pc, jint mark, TRAPS);
--- a/hotspot/src/share/vm/jvmci/jvmciEnv.cpp
+++ b/hotspot/src/share/vm/jvmci/jvmciEnv.cpp
@ -591,6 +591,13 @@ JVMCIEnv::CodeInstallResult JVMCIEnv::register_method(
  // JVMTI -- compiled method notification (must be done outside lock)
  if (nm != NULL) {
    nm->post_compiled_method_load_event();
+
+    if (env == NULL) {
+      // This compile didn't come through the CompileBroker so perform the printing here
+      DirectiveSet* directive = DirectivesStack::getMatchingDirective(method, compiler);
+      nm->maybe_print_nmethod(directive);
+      DirectivesStack::release(directive);
+    }
  }

  return result;
--- a/hotspot/src/share/vm/jvmci/vmStructs_jvmci.cpp
+++ b/hotspot/src/share/vm/jvmci/vmStructs_jvmci.cpp
@ -151,6 +151,7 @@
  nonstatic_field(JavaThread,                  _pending_failed_speculation,                   oop)                                   \
  nonstatic_field(JavaThread,                  _pending_transfer_to_interpreter,              bool)                                  \
  nonstatic_field(JavaThread,                  _jvmci_counters,                               jlong*)                                \
+  nonstatic_field(JavaThread,                  _reserved_stack_activation,                    address)                               \
                                                                                                                                     \
  static_field(java_lang_Class,                _klass_offset,                                 int)                                   \
  static_field(java_lang_Class,                _array_klass_offset,                           int)                                   \
@ -210,6 +211,8 @@
                                                                                                                                     \
  static_field(StubRoutines,                _verify_oop_count,                                jint)                                  \
                                                                                                                                     \
+  static_field(StubRoutines,                _throw_delayed_StackOverflowError_entry,          address)                               \
+                                                                                                                                     \
  static_field(StubRoutines,                _jbyte_arraycopy,                                 address)                               \
  static_field(StubRoutines,                _jshort_arraycopy,                                address)                               \
  static_field(StubRoutines,                _jint_arraycopy,                                  address)                               \
@ -471,6 +474,7 @@
  declare_constant(Method::_force_inline)                                 \
  declare_constant(Method::_dont_inline)                                  \
  declare_constant(Method::_hidden)                                       \
+  declare_constant(Method::_reserved_stack_access)                        \
                                                                          \
  declare_constant(Method::nonvirtual_vtable_index)                       \
  declare_constant(Method::invalid_vtable_index)                          \
@ -517,6 +521,7 @@
  declare_function(SharedRuntime::register_finalizer)                     \
  declare_function(SharedRuntime::exception_handler_for_return_address)   \
  declare_function(SharedRuntime::OSR_migration_end)                      \
+  declare_function(SharedRuntime::enable_stack_reserved_zone)             \
  declare_function(SharedRuntime::dsin)                                   \
  declare_function(SharedRuntime::dcos)                                   \
  declare_function(SharedRuntime::dtan)                                   \
--- a/hotspot/src/share/vm/opto/c2_globals.hpp
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp
@ -182,6 +182,10 @@
          "Unroll loop bodies with node count less than this")              \
          range(0, max_jint / 4)                                            \
                                                                            \
+  product_pd(intx, LoopPercentProfileLimit,                                 \
+             "Unroll loop bodies with % node count of profile limit")       \
+             range(10, 100)                                                 \
+                                                                            \
  product(intx,  LoopMaxUnroll, 16,                                         \
          "Maximum number of unrolls for main loop")                        \
          range(0, max_jint)                                                \
--- a/hotspot/src/share/vm/opto/callGenerator.cpp
+++ b/hotspot/src/share/vm/opto/callGenerator.cpp
@ -825,10 +825,12 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
        input_not_const = false;
        const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr();
        ciMethod* target = oop_ptr->const_oop()->as_method_handle()->get_vmtarget();
-        guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
        const int vtable_index = Method::invalid_vtable_index;
-        CallGenerator* cg = C->call_generator(target, vtable_index, false, jvms, true, PROB_ALWAYS, NULL, true, true);
-        assert(cg == NULL || !cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
+        CallGenerator* cg = C->call_generator(target, vtable_index,
+                                              false /* call_does_dispatch */,
+                                              jvms,
+                                              true /* allow_inline */,
+                                              PROB_ALWAYS);
        return cg;
      } else {
        const char* msg = "receiver not constant";
@ -899,13 +901,15 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod*
          target = C->optimize_virtual_call(caller, jvms->bci(), klass, klass,
                                            target, receiver_type, is_virtual,
                                            call_does_dispatch, vtable_index, // out-parameters
-                                            /*check_access=*/false);
+                                            false /* check_access */);
          // We lack profiling at this call but type speculation may
          // provide us with a type
          speculative_receiver_type = (receiver_type != NULL) ? receiver_type->speculative_type() : NULL;
        }
-        CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms, /*allow_inline=*/true, PROB_ALWAYS, speculative_receiver_type, true, true);
-        assert(cg == NULL || !cg->is_late_inline() || cg->is_mh_late_inline(), "no late inline here");
+        CallGenerator* cg = C->call_generator(target, vtable_index, call_does_dispatch, jvms,
+                                              true /* allow_inline */,
+                                              PROB_ALWAYS,
+                                              speculative_receiver_type);
        return cg;
      } else {
        const char* msg = "member_name not constant";
--- a/hotspot/src/share/vm/opto/castnode.cpp
+++ b/hotspot/src/share/vm/opto/castnode.cpp
@ -37,7 +37,6 @@
 Node* ConstraintCastNode::Identity(PhaseGVN* phase) {
  Node* dom = dominating_cast(phase);
  if (dom != NULL) {
-    assert(_carry_dependency, "only for casts that carry a dependency");
    return dom;
  }
  if (_carry_dependency) {
@ -110,18 +109,22 @@ Node* ConstraintCastNode::make_cast(int opcode, Node* c, Node *n, const Type *t,
 }

 TypeNode* ConstraintCastNode::dominating_cast(PhaseTransform *phase) const {
-  if (!carry_dependency()) {
-    return NULL;
-  }
  Node* val = in(1);
  Node* ctl = in(0);
  int opc = Opcode();
  if (ctl == NULL) {
    return NULL;
  }
+  // Range check CastIIs may all end up under a single range check and
+  // in that case only the narrower CastII would be kept by the code
+  // below which would be incorrect.
+  if (is_CastII() && as_CastII()->has_range_check()) {
+    return NULL;
+  }
  for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
    Node* u = val->fast_out(i);
    if (u != this &&
+        u->outcnt() > 0 &&
        u->Opcode() == opc &&
        u->in(0) != NULL &&
        u->bottom_type()->higher_equal(type())) {
@ -300,7 +303,6 @@ void CastIINode::dump_spec(outputStream* st) const {
 Node* CheckCastPPNode::Identity(PhaseGVN* phase) {
  Node* dom = dominating_cast(phase);
  if (dom != NULL) {
-    assert(_carry_dependency, "only for casts that carry a dependency");
    return dom;
  }
  if (_carry_dependency) {
--- a/hotspot/src/share/vm/opto/loopTransform.cpp
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp
@ -666,7 +666,8 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
    if (future_unroll_ct > LoopMaxUnroll) return false;
  } else {
    // obey user constraints on vector mapped loops with additional unrolling applied
-    if ((future_unroll_ct / cl->slp_max_unroll()) > LoopMaxUnroll) return false;
+    int unroll_constraint = (cl->slp_max_unroll()) ? cl->slp_max_unroll() : 1;
+    if ((future_unroll_ct / unroll_constraint) > LoopMaxUnroll) return false;
  }

  // Check for initial stride being a small enough constant
@ -689,7 +690,7 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
  //   Progress defined as current size less than 20% larger than previous size.
  if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
      future_unroll_ct > LoopUnrollMin &&
-      (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
+      (future_unroll_ct - 1) * (100 / LoopPercentProfileLimit) > cl->profile_trip_cnt() &&
      1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
    return false;
  }
@ -1260,6 +1261,146 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
  loop->record_for_igvn();
 }

+//------------------------------insert_vector_post_loop------------------------
+// Insert a copy of the atomic unrolled vectorized main loop as a post loop,
+// unroll_policy has already informed us that more unrolling is about to happen to
+// the main loop.  The resultant post loop will serve as a vectorized drain loop.
+void PhaseIdealLoop::insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new) {
+  if (!loop->_head->is_CountedLoop()) return;
+
+  CountedLoopNode *cl = loop->_head->as_CountedLoop();
+
+  // only process vectorized main loops
+  if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
+
+  int slp_max_unroll_factor = cl->slp_max_unroll();
+  int cur_unroll = cl->unrolled_count();
+
+  if (slp_max_unroll_factor == 0) return;
+
+  // only process atomic unroll vector loops (not super unrolled after vectorization)
+  if (cur_unroll != slp_max_unroll_factor) return;
+
+  // we only ever process this one time
+  if (cl->has_atomic_post_loop()) return;
+
+#ifndef PRODUCT
+  if (TraceLoopOpts) {
+    tty->print("PostVector  ");
+    loop->dump_head();
+  }
+#endif
+  C->set_major_progress();
+
+  // Find common pieces of the loop being guarded with pre & post loops
+  CountedLoopNode *main_head = loop->_head->as_CountedLoop();
+  CountedLoopEndNode *main_end = main_head->loopexit();
+  guarantee(main_end != NULL, "no loop exit node");
+  // diagnostic to show loop end is not properly formed
+  assert(main_end->outcnt() == 2, "1 true, 1 false path only");
+  uint dd_main_head = dom_depth(main_head);
+  uint max = main_head->outcnt();
+
+  // mark this loop as processed
+  main_head->mark_has_atomic_post_loop();
+
+  Node *pre_header = main_head->in(LoopNode::EntryControl);
+  Node *init = main_head->init_trip();
+  Node *incr = main_end->incr();
+  Node *limit = main_end->limit();
+  Node *stride = main_end->stride();
+  Node *cmp = main_end->cmp_node();
+  BoolTest::mask b_test = main_end->test_trip();
+
+  //------------------------------
+  // Step A: Create a new post-Loop.
+  Node* main_exit = main_end->proj_out(false);
+  assert(main_exit->Opcode() == Op_IfFalse, "");
+  int dd_main_exit = dom_depth(main_exit);
+
+  // Step A1: Clone the loop body of main.  The clone becomes the vector post-loop.
+  // The main loop pre-header illegally has 2 control users (old & new loops).
+  clone_loop(loop, old_new, dd_main_exit);
+  assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
+  CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
+  post_head->set_normal_loop();
+  post_head->set_post_loop(main_head);
+
+  // Reduce the post-loop trip count.
+  CountedLoopEndNode* post_end = old_new[main_end->_idx]->as_CountedLoopEnd();
+  post_end->_prob = PROB_FAIR;
+
+  // Build the main-loop normal exit.
+  IfFalseNode *new_main_exit = new IfFalseNode(main_end);
+  _igvn.register_new_node_with_optimizer(new_main_exit);
+  set_idom(new_main_exit, main_end, dd_main_exit);
+  set_loop(new_main_exit, loop->_parent);
+
+  // Step A2: Build a zero-trip guard for the vector post-loop.  After leaving the
+  // main-loop, the vector post-loop may not execute at all.  We 'opaque' the incr
+  // (the vectorized main-loop trip-counter exit value) because we will be changing
+  // the exit value (via additional unrolling) so we cannot constant-fold away the zero
+  // trip guard until all unrolling is done.
+  Node *zer_opaq = new Opaque1Node(C, incr);
+  Node *zer_cmp = new CmpINode(zer_opaq, limit);
+  Node *zer_bol = new BoolNode(zer_cmp, b_test);
+  register_new_node(zer_opaq, new_main_exit);
+  register_new_node(zer_cmp, new_main_exit);
+  register_new_node(zer_bol, new_main_exit);
+
+  // Build the IfNode
+  IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
+  _igvn.register_new_node_with_optimizer(zer_iff);
+  set_idom(zer_iff, new_main_exit, dd_main_exit);
+  set_loop(zer_iff, loop->_parent);
+
+  // Plug in the false-path, taken if we need to skip vector post-loop
+  _igvn.replace_input_of(main_exit, 0, zer_iff);
+  set_idom(main_exit, zer_iff, dd_main_exit);
+  set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
+  // Make the true-path, must enter the vector post loop
+  Node *zer_taken = new IfTrueNode(zer_iff);
+  _igvn.register_new_node_with_optimizer(zer_taken);
+  set_idom(zer_taken, zer_iff, dd_main_exit);
+  set_loop(zer_taken, loop->_parent);
+  // Plug in the true path
+  _igvn.hash_delete(post_head);
+  post_head->set_req(LoopNode::EntryControl, zer_taken);
+  set_idom(post_head, zer_taken, dd_main_exit);
+
+  Arena *a = Thread::current()->resource_area();
+  VectorSet visited(a);
+  Node_Stack clones(a, main_head->back_control()->outcnt());
+  // Step A3: Make the fall-in values to the vector post-loop come from the
+  // fall-out values of the main-loop.
+  for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
+    Node* main_phi = main_head->fast_out(i);
+    if (main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0) {
+      Node *cur_phi = old_new[main_phi->_idx];
+      Node *fallnew = clone_up_backedge_goo(main_head->back_control(),
+                                            post_head->init_control(),
+                                            main_phi->in(LoopNode::LoopBackControl),
+                                            visited, clones);
+      _igvn.hash_delete(cur_phi);
+      cur_phi->set_req(LoopNode::EntryControl, fallnew);
+    }
+  }
+
+  // CastII for the new post loop:
+  bool inserted = cast_incr_before_loop(zer_opaq->in(1), zer_taken, post_head);
+  assert(inserted, "no castII inserted");
+
+  // It's difficult to be precise about the trip-counts
+  // for post loops.  They are usually very short,
+  // so guess that unit vector trips is a reasonable value.
+  post_head->set_profile_trip_cnt((float)slp_max_unroll_factor);
+
+  // Now force out all loop-invariant dominating tests.  The optimizer
+  // finds some, but we _know_ they are all useless.
+  peeled_dom_test_elim(loop, old_new);
+  loop->record_for_igvn();
+}
+
 //------------------------------is_invariant-----------------------------
 // Return true if n is invariant
 bool IdealLoopTree::is_invariant(Node* n) const {
@ -2608,6 +2749,9 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_
    // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
    // peeling.
    if (should_unroll && !should_peel) {
+      if (SuperWordLoopUnrollAnalysis) {
+        phase->insert_vector_post_loop(this, old_new);
+      }
      phase->do_unroll(this, old_new, true);
    }

--- a/hotspot/src/share/vm/opto/loopnode.hpp
+++ b/hotspot/src/share/vm/opto/loopnode.hpp
@ -67,7 +67,9 @@ protected:
         HasReductions=128,
         WasSlpAnalyzed=256,
         PassedSlpAnalysis=512,
-         DoUnrollOnly=1024 };
+         DoUnrollOnly=1024,
+         VectorizedLoop=2048,
+         HasAtomicPostLoop=4096 };
  char _unswitch_count;
  enum { _unswitch_max=3 };

@ -86,6 +88,8 @@ public:
  void mark_was_slp() { _loop_flags |= WasSlpAnalyzed; }
  void mark_passed_slp() { _loop_flags |= PassedSlpAnalysis; }
  void mark_do_unroll_only() { _loop_flags |= DoUnrollOnly; }
+  void mark_loop_vectorized() { _loop_flags |= VectorizedLoop; }
+  void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }

  int unswitch_max() { return _unswitch_max; }
  int unswitch_count() { return _unswitch_count; }
@ -221,6 +225,8 @@ public:
  int has_passed_slp   () const { return (_loop_flags&PassedSlpAnalysis) == PassedSlpAnalysis; }
  int do_unroll_only      () const { return (_loop_flags&DoUnrollOnly) == DoUnrollOnly; }
  int is_main_no_pre_loop() const { return _loop_flags & MainHasNoPreLoop; }
+  int is_vectorized_loop    () const { return (_loop_flags & VectorizedLoop) == VectorizedLoop; }
+  int has_atomic_post_loop  () const { return (_loop_flags & HasAtomicPostLoop) == HasAtomicPostLoop; }
  void set_main_no_pre_loop() { _loop_flags |= MainHasNoPreLoop; }

  int main_idx() const { return _main_idx; }
@ -893,6 +899,8 @@ public:
  // Add pre and post loops around the given loop.  These loops are used
  // during RCE, unrolling and aligning loops.
  void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
+  // Add a vector post loop between a vector main loop and the current post loop
+  void insert_vector_post_loop(IdealLoopTree *loop, Node_List &old_new);
  // If Node n lives in the back_ctrl block, we clone a private version of n
  // in preheader_ctrl block and return that, otherwise return n.
  Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n, VectorSet &visited, Node_Stack &clones );
@ -1105,6 +1113,8 @@ private:
  Node *place_near_use( Node *useblock ) const;
  Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
  void try_move_store_after_loop(Node* n);
+  bool identical_backtoback_ifs(Node *n);
+  bool can_split_if(Node *n_ctrl);

  bool _created_loop_node;
 public:
--- a/hotspot/src/share/vm/opto/loopopts.cpp
+++ b/hotspot/src/share/vm/opto/loopopts.cpp
@ -47,6 +47,14 @@ Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
    return NULL;
  }

+  // Splitting range check CastIIs through a loop induction Phi can
+  // cause new Phis to be created that are left unrelated to the loop
+  // induction Phi and prevent optimizations (vectorization)
+  if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
+      region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
+    return NULL;
+  }
+
  int wins = 0;
  assert(!n->is_CFG(), "");
  assert(region->is_Region(), "");
@ -1020,108 +1028,193 @@ Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
 }


+bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
+  if (!n->is_If()) {
+    return false;
+  }
+  if (!n->in(0)->is_Region()) {
+    return false;
+  }
+  Node* region = n->in(0);
+  Node* dom = idom(region);
+  if (!dom->is_If() || dom->in(1) != n->in(1)) {
+    return false;
+  }
+  IfNode* dom_if = dom->as_If();
+  Node* proj_true = dom_if->proj_out(1);
+  Node* proj_false = dom_if->proj_out(0);
+
+  for (uint i = 1; i < region->req(); i++) {
+    if (is_dominator(proj_true, region->in(i))) {
+      continue;
+    }
+    if (is_dominator(proj_false, region->in(i))) {
+      continue;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool PhaseIdealLoop::can_split_if(Node *n_ctrl) {
+  if (C->live_nodes() > 35000) {
+    return false; // Method too big
+  }
+
+  // Do not do 'split-if' if irreducible loops are present.
+  if (_has_irreducible_loops) {
+    return false;
+  }
+
+  if (merge_point_too_heavy(C, n_ctrl)) {
+    return false;
+  }
+
+  // Do not do 'split-if' if some paths are dead.  First do dead code
+  // elimination and then see if its still profitable.
+  for (uint i = 1; i < n_ctrl->req(); i++) {
+    if (n_ctrl->in(i) == C->top()) {
+      return false;
+    }
+  }
+
+  // If trying to do a 'Split-If' at the loop head, it is only
+  // profitable if the cmp folds up on BOTH paths.  Otherwise we
+  // risk peeling a loop forever.
+
+  // CNC - Disabled for now.  Requires careful handling of loop
+  // body selection for the cloned code.  Also, make sure we check
+  // for any input path not being in the same loop as n_ctrl.  For
+  // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
+  // because the alternative loop entry points won't be converted
+  // into LoopNodes.
+  IdealLoopTree *n_loop = get_loop(n_ctrl);
+  for (uint j = 1; j < n_ctrl->req(); j++) {
+    if (get_loop(n_ctrl->in(j)) != n_loop) {
+      return false;
+    }
+  }
+
+  // Check for safety of the merge point.
+  if (!merge_point_safe(n_ctrl)) {
+    return false;
+  }
+
+  return true;
+}
+
 //------------------------------split_if_with_blocks_post----------------------
 // Do the real work in a non-recursive function.  CFG hackery wants to be
 // in the post-order, so it can dirty the I-DOM info and not use the dirtied
 // info.
-void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
+void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {

  // Cloning Cmp through Phi's involves the split-if transform.
  // FastLock is not used by an If
-  if( n->is_Cmp() && !n->is_FastLock() ) {
-    if( C->live_nodes() > 35000 ) return; // Method too big
-
-    // Do not do 'split-if' if irreducible loops are present.
-    if( _has_irreducible_loops )
-      return;
-
+  if (n->is_Cmp() && !n->is_FastLock()) {
    Node *n_ctrl = get_ctrl(n);
    // Determine if the Node has inputs from some local Phi.
    // Returns the block to clone thru.
-    Node *n_blk = has_local_phi_input( n );
-    if( n_blk != n_ctrl ) return;
-
-    if( merge_point_too_heavy(C, n_ctrl) )
+    Node *n_blk = has_local_phi_input(n);
+    if (n_blk != n_ctrl) {
      return;
+    }

-    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
+    if (!can_split_if(n_ctrl)) {
+      return;
+    }
+
+    if (n->outcnt() != 1) {
+      return; // Multiple bool's from 1 compare?
+    }
    Node *bol = n->unique_out();
-    assert( bol->is_Bool(), "expect a bool here" );
-    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
+    assert(bol->is_Bool(), "expect a bool here");
+    if (bol->outcnt() != 1) {
+      return;// Multiple branches from 1 compare?
+    }
    Node *iff = bol->unique_out();

    // Check some safety conditions
-    if( iff->is_If() ) {        // Classic split-if?
-      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
+    if (iff->is_If()) {        // Classic split-if?
+      if (iff->in(0) != n_ctrl) {
+        return; // Compare must be in same blk as if
+      }
    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
      // Can't split CMove with different control edge.
-      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
-      if( get_ctrl(iff->in(2)) == n_ctrl ||
-          get_ctrl(iff->in(3)) == n_ctrl )
+      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
+        return;
+      }
+      if (get_ctrl(iff->in(2)) == n_ctrl ||
+          get_ctrl(iff->in(3)) == n_ctrl) {
        return;                 // Inputs not yet split-up
-      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
+      }
+      if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
        return;                 // Loop-invar test gates loop-varying CMOVE
      }
    } else {
      return;  // some other kind of node, such as an Allocate
    }

-    // Do not do 'split-if' if some paths are dead.  First do dead code
-    // elimination and then see if its still profitable.
-    for( uint i = 1; i < n_ctrl->req(); i++ )
-      if( n_ctrl->in(i) == C->top() )
-        return;
-
    // When is split-if profitable?  Every 'win' on means some control flow
    // goes dead, so it's almost always a win.
    int policy = 0;
-    // If trying to do a 'Split-If' at the loop head, it is only
-    // profitable if the cmp folds up on BOTH paths.  Otherwise we
-    // risk peeling a loop forever.
-
-    // CNC - Disabled for now.  Requires careful handling of loop
-    // body selection for the cloned code.  Also, make sure we check
-    // for any input path not being in the same loop as n_ctrl.  For
-    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
-    // because the alternative loop entry points won't be converted
-    // into LoopNodes.
-    IdealLoopTree *n_loop = get_loop(n_ctrl);
-    for( uint j = 1; j < n_ctrl->req(); j++ )
-      if( get_loop(n_ctrl->in(j)) != n_loop )
-        return;
-
-    // Check for safety of the merge point.
-    if( !merge_point_safe(n_ctrl) ) {
+    // Split compare 'n' through the merge point if it is profitable
+    Node *phi = split_thru_phi( n, n_ctrl, policy);
+    if (!phi) {
      return;
    }

-    // Split compare 'n' through the merge point if it is profitable
-    Node *phi = split_thru_phi( n, n_ctrl, policy );
-    if( !phi ) return;
-
    // Found a Phi to split thru!
    // Replace 'n' with the new phi
-    _igvn.replace_node( n, phi );
+    _igvn.replace_node(n, phi);

    // Now split the bool up thru the phi
-    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
+    Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
    guarantee(bolphi != NULL, "null boolean phi node");

-    _igvn.replace_node( bol, bolphi );
-    assert( iff->in(1) == bolphi, "" );
+    _igvn.replace_node(bol, bolphi);
+    assert(iff->in(1) == bolphi, "");

-    if( bolphi->Value(&_igvn)->singleton() )
+    if (bolphi->Value(&_igvn)->singleton()) {
      return;
+    }

    // Conditional-move?  Must split up now
-    if( !iff->is_If() ) {
-      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
-      _igvn.replace_node( iff, cmovphi );
+    if (!iff->is_If()) {
+      Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
+      _igvn.replace_node(iff, cmovphi);
      return;
    }

    // Now split the IF
-    do_split_if( iff );
+    do_split_if(iff);
+    return;
+  }
+
+  // Two identical ifs back to back can be merged
+  if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
+    Node *n_ctrl = n->in(0);
+    PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
+    IfNode* dom_if = idom(n_ctrl)->as_If();
+    Node* proj_true = dom_if->proj_out(1);
+    Node* proj_false = dom_if->proj_out(0);
+    Node* con_true = _igvn.makecon(TypeInt::ONE);
+    Node* con_false = _igvn.makecon(TypeInt::ZERO);
+
+    for (uint i = 1; i < n_ctrl->req(); i++) {
+      if (is_dominator(proj_true, n_ctrl->in(i))) {
+        bolphi->init_req(i, con_true);
+      } else {
+        assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
+        bolphi->init_req(i, con_false);
+      }
+    }
+    register_new_node(bolphi, n_ctrl);
+    _igvn.replace_input_of(n, 1, bolphi);
+
+    // Now split the IF
+    do_split_if(n);
    return;
  }

--- a/hotspot/src/share/vm/opto/memnode.cpp
+++ b/hotspot/src/share/vm/opto/memnode.cpp
@ -1709,38 +1709,10 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
            // unsafe field access may not have a constant offset
            C->has_unsafe_access(),
            "Field accesses must be precise" );
-    // For oop loads, we expect the _type to be precise
-    if (klass == env->String_klass() &&
-        adr->is_AddP() && off != Type::OffsetBot) {
-      // For constant Strings treat the final fields as compile time constants.
-      // While we can list what field types java.lang.String has, it is more
-      // future-proof to handle all possible field types, anticipating future
-      // changes and experiments in String code.
-      Node* base = adr->in(AddPNode::Base);
-      const TypeOopPtr* t = phase->type(base)->isa_oopptr();
-      if (t != NULL && t->singleton()) {
-        ciField* field = env->String_klass()->get_field_by_offset(off, false);
-        if (field != NULL && field->is_final()) {
-          ciObject* string = t->const_oop();
-          ciConstant constant = string->as_instance()->field_value(field);
-          // Type::make_from_constant does not handle narrow oops, so handle it here.
-          // Everything else can use the factory method.
-          if ((constant.basic_type() == T_ARRAY || constant.basic_type() == T_OBJECT)
-                  && adr->bottom_type()->is_ptr_to_narrowoop()) {
-            return TypeNarrowOop::make_from_constant(constant.as_object(), true);
-          } else {
-            return Type::make_from_constant(constant, true);
-          }
-        }
-      }
-    }
+    // For oop loads, we expect the _type to be precise.
    // Optimizations for constant objects
    ciObject* const_oop = tinst->const_oop();
    if (const_oop != NULL) {
-      // For constant Boxed value treat the target field as a compile time constant.
-      if (tinst->is_ptr_to_boxed_value()) {
-        return tinst->get_const_boxed_value();
-      } else
      // For constant CallSites treat the target field as a compile time constant.
      if (const_oop->is_call_site()) {
        ciCallSite* call_site = const_oop->as_call_site();
--- a/hotspot/src/share/vm/opto/stringopts.cpp
+++ b/hotspot/src/share/vm/opto/stringopts.cpp
@ -1552,8 +1552,7 @@ Node* PhaseStringOpts::copy_string(GraphKit& kit, Node* str, Node* dst_array, No

  if (str->is_Con()) {
    // Constant source string
-    const TypeOopPtr* t = kit.gvn().type(src_array)->isa_oopptr();
-    ciTypeArray* src_array_type = t->const_oop()->as_type_array();
+    ciTypeArray* src_array_type = get_constant_value(kit, str);

    // Check encoding of constant string
    bool src_is_byte = (get_constant_coder(kit, str) == java_lang_String::CODER_LATIN1);
@ -1673,9 +1672,15 @@ jbyte PhaseStringOpts::get_constant_coder(GraphKit& kit, Node* str) {

 int PhaseStringOpts::get_constant_length(GraphKit& kit, Node* str) {
  assert(str->is_Con(), "String must be constant");
-  Node* src_array = kit.load_String_value(kit.control(), str);
-  const TypeOopPtr* t = kit.gvn().type(src_array)->isa_oopptr();
-  return t->const_oop()->as_type_array()->length();
+  return get_constant_value(kit, str)->length();
+}
+
+ciTypeArray* PhaseStringOpts::get_constant_value(GraphKit& kit, Node* str) {
+  assert(str->is_Con(), "String must be constant");
+  const TypeOopPtr* str_type = kit.gvn().type(str)->isa_oopptr();
+  ciInstance* str_instance = str_type->const_oop()->as_instance();
+  ciObject* src_array = str_instance->field_value_by_offset(java_lang_String::value_offset_in_bytes()).as_object();
+  return src_array->as_type_array();
 }

 void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
--- a/hotspot/src/share/vm/opto/stringopts.hpp
+++ b/hotspot/src/share/vm/opto/stringopts.hpp
@ -97,6 +97,9 @@ class PhaseStringOpts : public Phase {
  // Returns the length of a constant string
  int get_constant_length(GraphKit& kit, Node* str);

+  // Returns the value array of a constant string
+  ciTypeArray* get_constant_value(GraphKit& kit, Node* str);
+
  // Clean up any leftover nodes
  void record_dead_node(Node* node);
  void remove_dead_nodes();
--- a/hotspot/src/share/vm/opto/superword.cpp
+++ b/hotspot/src/share/vm/opto/superword.cpp
@ -2253,6 +2253,9 @@ void SuperWord::output() {
          C->set_major_progress();
        }
        cl->mark_do_unroll_only();
+        if (do_reserve_copy()) {
+          cl->mark_loop_vectorized();
+        }
      }
    }
  }
--- a/hotspot/src/share/vm/prims/methodHandles.cpp
+++ b/hotspot/src/share/vm/prims/methodHandles.cpp
@ -63,30 +63,21 @@
 bool MethodHandles::_enabled = false; // set true after successful native linkage
 MethodHandlesAdapterBlob* MethodHandles::_adapter_code = NULL;

-
 /**
 * Generates method handle adapters. Returns 'false' if memory allocation
 * failed and true otherwise.
 */
-bool MethodHandles::generate_adapters() {
-  if (SystemDictionary::MethodHandle_klass() == NULL) {
-    return true;
-  }
-
+void MethodHandles::generate_adapters() {
+  assert(SystemDictionary::MethodHandle_klass() != NULL, "should be present");
  assert(_adapter_code == NULL, "generate only once");

  ResourceMark rm;
  TraceTime timer("MethodHandles adapters generation", TraceStartupTime);
  _adapter_code = MethodHandlesAdapterBlob::create(adapter_code_size);
-  if (_adapter_code == NULL) {
-     return false;
-  }
-
  CodeBuffer code(_adapter_code);
  MethodHandlesAdapterGenerator g(&code);
  g.generate();
  code.log_section_sizes("MethodHandlesAdapterBlob");
-  return true;
 }

 //------------------------------------------------------------------------------
@ -1435,54 +1426,32 @@ static JNINativeMethod MH_methods[] = {
  {CC "invokeExact",               CC "([" OBJ ")" OBJ,                       FN_PTR(MH_invokeExact_UOE)}
 };

-/**
- * Helper method to register native methods.
- */
-static bool register_natives(JNIEnv* env, jclass clazz, const JNINativeMethod* methods, jint nMethods) {
-  int status = env->RegisterNatives(clazz, methods, nMethods);
-  if (status != JNI_OK || env->ExceptionOccurred()) {
-    warning("JSR 292 method handle code is mismatched to this JVM.  Disabling support.");
-    env->ExceptionClear();
-    return false;
-  }
-  return true;
-}
-
 /**
 * This one function is exported, used by NativeLookup.
 */
 JVM_ENTRY(void, JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass MHN_class)) {
  assert(!MethodHandles::enabled(), "must not be enabled");
-  bool enable_MH = true;
+  assert(SystemDictionary::MethodHandle_klass() != NULL, "should be present");

-  jclass MH_class = NULL;
-  if (SystemDictionary::MethodHandle_klass() == NULL) {
-    enable_MH = false;
-  } else {
-    oop mirror = SystemDictionary::MethodHandle_klass()->java_mirror();
-    MH_class = (jclass) JNIHandles::make_local(env, mirror);
-  }
+  oop mirror = SystemDictionary::MethodHandle_klass()->java_mirror();
+  jclass MH_class = (jclass) JNIHandles::make_local(env, mirror);

-  if (enable_MH) {
+  {
    ThreadToNativeFromVM ttnfv(thread);

-    if (enable_MH) {
-      enable_MH = register_natives(env, MHN_class, MHN_methods, sizeof(MHN_methods)/sizeof(JNINativeMethod));
-    }
-    if (enable_MH) {
-      enable_MH = register_natives(env, MH_class, MH_methods, sizeof(MH_methods)/sizeof(JNINativeMethod));
-    }
+    int status = env->RegisterNatives(MHN_class, MHN_methods, sizeof(MHN_methods)/sizeof(JNINativeMethod));
+    guarantee(status == JNI_OK && !env->ExceptionOccurred(),
+              "register java.lang.invoke.MethodHandleNative natives");
+
+    status = env->RegisterNatives(MH_class, MH_methods, sizeof(MH_methods)/sizeof(JNINativeMethod));
+    guarantee(status == JNI_OK && !env->ExceptionOccurred(),
+              "register java.lang.invoke.MethodHandle natives");
  }

  if (TraceInvokeDynamic) {
    tty->print_cr("MethodHandle support loaded (using LambdaForms)");
  }

-  if (enable_MH) {
-    if (MethodHandles::generate_adapters() == false) {
-      THROW_MSG(vmSymbols::java_lang_VirtualMachineError(), "Out of space in CodeCache for method handle adapters");
-    }
-    MethodHandles::set_enabled(true);
-  }
+  MethodHandles::set_enabled(true);
 }
 JVM_END
--- a/hotspot/src/share/vm/prims/methodHandles.hpp
+++ b/hotspot/src/share/vm/prims/methodHandles.hpp
@ -81,7 +81,7 @@ class MethodHandles: AllStatic {
  static void flush_dependent_nmethods(Handle call_site, Handle target);

  // Generate MethodHandles adapters.
-  static bool generate_adapters();
+  static void generate_adapters();

  // Called from MethodHandlesAdapterGenerator.
  static address generate_method_handle_interpreter_entry(MacroAssembler* _masm, vmIntrinsics::ID iid);
--- a/hotspot/src/share/vm/runtime/init.cpp
+++ b/hotspot/src/share/vm/runtime/init.cpp
@ -145,6 +145,7 @@ jint init_globals() {
  }
  javaClasses_init();   // must happen after vtable initialization
  stubRoutines_init2(); // note: StubRoutines need 2-phase init
+  MethodHandles::generate_adapters();
  CodeCacheExtensions::complete_step(CodeCacheExtensionsSteps::StubRoutines2);

 #if INCLUDE_NMT
--- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp
@ -359,6 +359,11 @@ class SharedRuntime: AllStatic {
  static address clean_opt_virtual_call_entry();
  static address clean_static_call_entry();

+#if defined(X86) && defined(COMPILER1)
+  // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
+  static void inline_check_hashcode_from_object_header(MacroAssembler* masm, methodHandle method, Register obj_reg, Register result);
+#endif // X86 && COMPILER1
+
 public:

  // Read the array of BasicTypes from a Java signature, and compute where
--- a/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp
+++ b/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp
@ -37,7 +37,7 @@

 StubCodeDesc* StubCodeDesc::_list = NULL;
 int           StubCodeDesc::_count = 0;
-
+bool          StubCodeDesc::_frozen = false;

 StubCodeDesc* StubCodeDesc::desc_for(address pc) {
  StubCodeDesc* p = _list;
@ -46,20 +46,23 @@ StubCodeDesc* StubCodeDesc::desc_for(address pc) {
  return p;
 }

-
 StubCodeDesc* StubCodeDesc::desc_for_index(int index) {
  StubCodeDesc* p = _list;
  while (p != NULL && p->index() != index) p = p->_next;
  return p;
 }

-
 const char* StubCodeDesc::name_for(address pc) {
  StubCodeDesc* p = desc_for(pc);
  return p == NULL ? NULL : p->name();
 }


+void StubCodeDesc::freeze() {
+  assert(!_frozen, "repeated freeze operation");
+  _frozen = true;
+}
+
 void StubCodeDesc::print_on(outputStream* st) const {
  st->print("%s", group());
  st->print("::");
@ -110,12 +113,10 @@ StubCodeGenerator::~StubCodeGenerator() {
  }
 }

-
 void StubCodeGenerator::stub_prolog(StubCodeDesc* cdesc) {
  // default implementation - do nothing
 }

-
 void StubCodeGenerator::stub_epilog(StubCodeDesc* cdesc) {
  // default implementation - record the cdesc
  if (_first_stub == NULL)  _first_stub = cdesc;
--- a/hotspot/src/share/vm/runtime/stubCodeGenerator.hpp
+++ b/hotspot/src/share/vm/runtime/stubCodeGenerator.hpp
@ -28,7 +28,7 @@
 #include "asm/assembler.hpp"
 #include "memory/allocation.hpp"

-// All the basic framework for stubcode generation/debugging/printing.
+// All the basic framework for stub code generation/debugging/printing.


 // A StubCodeDesc describes a piece of generated code (usually stubs).
@ -37,9 +37,10 @@
 // this may have to change if searching becomes too slow.

 class StubCodeDesc: public CHeapObj<mtCode> {
- protected:
+ private:
  static StubCodeDesc* _list;                  // the list of all descriptors
  static int           _count;                 // length of list
+  static bool          _frozen;                // determines whether _list modifications are allowed

  StubCodeDesc*        _next;                  // the next element in the linked list
  const char*          _group;                 // the group to which the stub code belongs
@ -68,6 +69,7 @@ class StubCodeDesc: public CHeapObj<mtCode> {
  static const char*   name_for(address pc);     // returns the name of the code containing pc or NULL

  StubCodeDesc(const char* group, const char* name, address begin, address end = NULL) {
+    assert(!_frozen, "no modifications allowed");
    assert(name != NULL, "no name specified");
    _next           = _list;
    _group          = group;
@ -78,6 +80,8 @@ class StubCodeDesc: public CHeapObj<mtCode> {
    _list           = this;
  };

+  static void freeze();
+
  const char* group() const                      { return _group; }
  const char* name() const                       { return _name; }
  int         index() const                      { return _index; }
@ -117,7 +121,7 @@ class StubCodeGenerator: public StackObj {
 // later via an address pointing into it.

 class StubCodeMark: public StackObj {
- protected:
+ private:
  StubCodeGenerator* _cgen;
  StubCodeDesc*      _cdesc;

--- a/hotspot/src/share/vm/runtime/thread.cpp
+++ b/hotspot/src/share/vm/runtime/thread.cpp
@ -3600,6 +3600,9 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
    vm_exit_during_initialization("Failed to initialize tracing backend");
  }

+  // No more stub generation allowed after that point.
+  StubCodeDesc::freeze();
+
  // Set flag that basic initialization has completed. Used by exceptions and various
  // debug stuff, that does not work until all basic classes have been initialized.
  set_init_completed();
--- a/hotspot/src/share/vm/utilities/quickSort.cpp
+++ b/hotspot/src/share/vm/utilities/quickSort.cpp
@ -44,6 +44,31 @@ static int test_comparator(int a, int b) {
  }
  return 1;
 }
+
+static void print_array(const char* prefix, int* array, int length) {
+  tty->print("%s:", prefix);
+  for (int i = 0; i < length; i++) {
+    tty->print(" %d", array[i]);
+  }
+  tty->cr();
+}
+
+static bool compare_arrays(int* actual, int* expected, int length) {
+  for (int i = 0; i < length; i++) {
+    if (actual[i] != expected[i]) {
+      print_array("Sorted array  ", actual, length);
+      print_array("Expected array", expected, length);
+      return false;
+    }
+  }
+  return true;
+}
+
+template <class C>
+static bool sort_and_compare(int* arrayToSort, int* expectedResult, int length, C comparator, bool idempotent = false) {
+  QuickSort::sort<int, C>(arrayToSort, length, comparator, idempotent);
+  return compare_arrays(arrayToSort, expectedResult, length);
+}
 #endif // ASSERT

 static int test_even_odd_comparator(int a, int b) {
@ -72,31 +97,6 @@ extern "C" {
  }
 }

-static void print_array(const char* prefix, int* array, int length) {
-  tty->print("%s:", prefix);
-  for (int i = 0; i < length; i++) {
-    tty->print(" %d", array[i]);
-  }
-  tty->cr();
-}
-
-static bool compare_arrays(int* actual, int* expected, int length) {
-  for (int i = 0; i < length; i++) {
-    if (actual[i] != expected[i]) {
-      print_array("Sorted array  ", actual, length);
-      print_array("Expected array", expected, length);
-      return false;
-    }
-  }
-  return true;
-}
-
-template <class C>
-static bool sort_and_compare(int* arrayToSort, int* expectedResult, int length, C comparator, bool idempotent = false) {
-  QuickSort::sort<int, C>(arrayToSort, length, comparator, idempotent);
-  return compare_arrays(arrayToSort, expectedResult, length);
-}
-
 void QuickSort_test() {
  {
    int* test_array = NULL;
--- a/hotspot/test/compiler/intrinsics/string/TestStringIntrinsics2.java
+++ b/hotspot/test/compiler/intrinsics/string/TestStringIntrinsics2.java
@ -33,9 +33,10 @@
 *
 * @run main/othervm
 *        -Xbootclasspath/a:.
+ *        -Xmixed
 *        -XX:+UnlockDiagnosticVMOptions
 *        -XX:+WhiteBoxAPI
- *        -XX:MaxInlineSize=100
+ *        -XX:MaxInlineSize=70
 *        -XX:MinInliningThreshold=0
 *        TestStringIntrinsics2
 */
--- a/hotspot/test/compiler/loopopts/superword/ProdRed_Double.java
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Double.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Double
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Double
--- a/hotspot/test/compiler/loopopts/superword/ProdRed_Float.java
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Float.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : float test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Float
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Float
--- a/hotspot/test/compiler/loopopts/superword/ProdRed_Int.java
+++ b/hotspot/test/compiler/loopopts/superword/ProdRed_Int.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Int
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 ProdRed_Int
--- a/hotspot/test/compiler/loopopts/superword/ReductionPerf.java
+++ b/hotspot/test/compiler/loopopts/superword/ReductionPerf.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar product reduction optimizations : int test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 -XX:CompileCommand=exclude,ReductionPerf::main ReductionPerf
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:CompileThresholdScaling=0.1 -XX:CompileCommand=exclude,ReductionPerf::main ReductionPerf
--- a/hotspot/test/compiler/loopopts/superword/SumRedSqrt_Double.java
+++ b/hotspot/test/compiler/loopopts/superword/SumRedSqrt_Double.java
@ -26,7 +26,7 @@
 * @test
 * @bug 8135028
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double sqrt test
-* @requires os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
+* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRedSqrt_Double
--- a/hotspot/test/compiler/loopopts/superword/SumRed_Double.java
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Double.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : double test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Double
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Double
--- a/hotspot/test/compiler/loopopts/superword/SumRed_Float.java
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Float.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : float test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Float
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Float
--- a/hotspot/test/compiler/loopopts/superword/SumRed_Int.java
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Int.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8074981
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : int test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Int
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=2 -XX:CompileThresholdScaling=0.1 SumRed_Int
--- a/hotspot/test/compiler/loopopts/superword/SumRed_Long.java
+++ b/hotspot/test/compiler/loopopts/superword/SumRed_Long.java
@ -26,6 +26,7 @@
 * @test
 * @bug 8076276
 * @summary Add C2 x86 Superword support for scalar sum reduction optimizations : long test
+ * @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64"
 *
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Long
 * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-SuperWordReductions -XX:LoopUnrollLimit=250 -XX:LoopMaxUnroll=4 -XX:CompileThresholdScaling=0.1 SumRed_Long