diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index bcf4d5ea13b..ee9e2064941 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -76,7 +76,8 @@ static const unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_
   2,  4,  8,   // EVEX_OVM(0)
   16, 16, 16,  // EVEX_M128(0)
   8,  32, 64,  // EVEX_DUP(0)
-  0,  0,  0    // EVEX_NTUP
+  1,   1,  1,  // EVEX_NOSCALE(0)
+  0,  0,  0    // EVEX_ETUP
 };
 
 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
@@ -294,11 +295,7 @@ void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format)
 }
 
 static int encode(Register r) {
-  int enc = r->encoding();
-  if (enc >= 8) {
-    enc -= 8;
-  }
-  return enc;
+  return r->encoding() & 7;
 }
 
 void Assembler::emit_arith_b(int op1, int op2, Register dst, int imm8) {
@@ -438,6 +435,9 @@ bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vect
     case EVEX_DUP:
       break;
 
+    case EVEX_NOSCALE:
+      break;
+
     default:
       assert(0, "no valid evex tuple_table entry");
       break;
@@ -525,6 +525,9 @@ bool Assembler::emit_compressed_disp_byte(int &disp) {
     case EVEX_DUP:
       break;
 
+    case EVEX_NOSCALE:
+      break;
+
     default:
       assert(0, "no valid evex tuple_table entry");
       break;
@@ -546,6 +549,24 @@ bool Assembler::emit_compressed_disp_byte(int &disp) {
   return is8bit(disp);
 }
 
+bool Assembler::needs_rex2(Register reg1, Register reg2, Register reg3) {
+  bool rex2 = (reg1->is_valid() && reg1->encoding() >= 16) ||
+              (reg2->is_valid() && reg2->encoding() >= 16) ||
+              (reg3->is_valid() && reg3->encoding() >= 16);
+  assert(!rex2 || UseAPX, "extended gpr use requires UseAPX");
+  return rex2;
+}
+
+bool Assembler::needs_eevex(Register reg1, Register reg2, Register reg3) {
+  return needs_rex2(reg1, reg2, reg3);
+}
+
+bool Assembler::needs_eevex(int enc1, int enc2, int enc3) {
+  bool eevex = enc1 >= 16 || enc2 >= 16 || enc3 >=16;
+  assert(!eevex || UseAPX, "extended gpr use requires UseAPX");
+  return eevex;
+}
+
 static bool is_valid_encoding(int reg_enc) {
   return reg_enc >= 0;
 }
@@ -616,9 +637,9 @@ void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc,
     if (is_valid_encoding(index_enc)) {
       assert(scale != Address::no_scale, "inconsistent address");
       // [base + index*scale + disp]
-      if (disp == 0 && no_relocation &&
-          base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
+      if (disp == 0 && no_relocation && ((base_enc & 0x7) != 5)) {
         // [base + index*scale]
+        // !(rbp | r13 | r21 | r29)
         // [00 reg 100][ss index base]
         emit_modrm_sib(0b00, reg_enc, 0b100,
                        scale, index_enc, base_enc);
@@ -635,7 +656,8 @@ void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc,
                        scale, index_enc, base_enc);
         emit_data(disp, rspec, disp32_operand);
       }
-    } else if (base_enc == rsp->encoding() LP64_ONLY(|| base_enc == r12->encoding())) {
+    } else if ((base_enc & 0x7) == 4) {
+      // rsp | r12 | r20 | r28
       // [rsp + disp]
       if (disp == 0 && no_relocation) {
         // [rsp]
@@ -657,10 +679,11 @@ void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc,
       }
     } else {
       // [base + disp]
-      assert(base_enc != rsp->encoding() LP64_ONLY(&& base_enc != r12->encoding()), "illegal addressing mode");
-      if (disp == 0 && no_relocation &&
-          base_enc != rbp->encoding() LP64_ONLY(&& base_enc != r13->encoding())) {
+      // !(rsp | r12 | r20 | r28) were handled above
+      assert(((base_enc & 0x7) != 4), "illegal addressing mode");
+      if (disp == 0 && no_relocation &&  ((base_enc & 0x7) != 5)) {
         // [base]
+        // !(rbp | r13 | r21 | r29)
         // [00 reg base]
         emit_modrm(0, reg_enc, base_enc);
       } else if (emit_compressed_disp_byte(disp) && no_relocation) {
@@ -817,6 +840,13 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
     NOT_LP64(assert(false, "64bit prefixes"));
     goto again_after_prefix;
 
+  case REX2:
+    NOT_LP64(assert(false, "64bit prefixes"));
+    if ((0xFF & *ip++) & REXBIT_W) {
+      is_64bit = true;
+    }
+    goto again_after_prefix;
+
   case REX_W:
   case REX_WB:
   case REX_WX:
@@ -866,6 +896,14 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
     case REX_WRXB:
       NOT_LP64(assert(false, "64bit prefix found"));
       goto again_after_size_prefix2;
+
+    case REX2:
+      NOT_LP64(assert(false, "64bit prefix found"));
+      if ((0xFF & *ip++) & REXBIT_W) {
+        is_64bit = true;
+      }
+      goto again_after_size_prefix2;
+
     case 0x8B: // movw r, a
     case 0x89: // movw a, r
       debug_only(has_disp32 = true);
@@ -1155,6 +1193,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
     case REX_WRB:
     case REX_WRX:
     case REX_WRXB:
+    case REX2:
       NOT_LP64(assert(false, "found 64bit prefix"));
       ip++;
     default:
@@ -1281,6 +1320,33 @@ void Assembler::emit_operand(XMMRegister reg, Address adr, int post_addr_length)
   }
 }
 
+void Assembler::emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding, int byte3) {
+  int opcode_prefix = (ocp_and_encoding & 0xFF00) >> 8;
+  if (opcode_prefix != 0) {
+    emit_int32(opcode_prefix, (unsigned char)byte1, byte2 | (ocp_and_encoding & 0xFF), byte3);
+  } else {
+    emit_int24((unsigned char)byte1, byte2 | (ocp_and_encoding & 0xFF), byte3);
+  }
+}
+
+void Assembler::emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding) {
+  int opcode_prefix = (ocp_and_encoding & 0xFF00) >> 8;
+  if (opcode_prefix != 0) {
+    emit_int24(opcode_prefix, (unsigned char)byte1, byte2 | (ocp_and_encoding & 0xFF));
+  } else {
+    emit_int16((unsigned char)byte1, byte2 | (ocp_and_encoding & 0xFF));
+  }
+}
+
+void Assembler::emit_opcode_prefix_and_encoding(int byte1, int ocp_and_encoding) {
+  int opcode_prefix = (ocp_and_encoding & 0xFF00) >> 8;
+  if (opcode_prefix != 0) {
+    emit_int16(opcode_prefix, (unsigned char)byte1 | (ocp_and_encoding & 0xFF));
+  } else {
+    emit_int8((unsigned char)byte1 | (ocp_and_encoding & 0xFF));
+  }
+}
+
 // Now the Assembler instructions (identical for 32/64 bits)
 
 void Assembler::adcl(Address dst, int32_t imm32) {
@@ -1603,50 +1669,48 @@ void Assembler::andl(Register dst, Register src) {
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF2, (0xC0 | encode));
 }
 
 void Assembler::andnl(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2, 0);
 }
 
 void Assembler::bsfl(Register dst, Register src) {
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F,
-             (unsigned char)0xBC,
-             0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
 }
 
 void Assembler::bsrl(Register dst, Register src) {
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F,
-             (unsigned char)0xBD,
-             0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
 }
 
 void Assembler::bswapl(Register reg) { // bswap
-  int encode = prefix_and_encode(reg->encoding());
-  emit_int16(0x0F, (0xC8 | encode));
+  int encode = prefix_and_encode(reg->encoding(), false, true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xC8, encode);
 }
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3, (0xC0 | encode));
 }
 
 void Assembler::blsil(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src, 0);
@@ -1654,8 +1718,8 @@ void Assembler::blsil(Register dst, Address src) {
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3,
              0xC0 | encode);
 }
@@ -1663,7 +1727,8 @@ void Assembler::blsmskl(Register dst, Register src) {
 void Assembler::blsmskl(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src, 0);
@@ -1671,15 +1736,16 @@ void Assembler::blsmskl(Register dst, Address src) {
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3, (0xC0 | encode));
 }
 
 void Assembler::blsrl(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src, 0);
@@ -1743,18 +1809,15 @@ void Assembler::cld() {
 
 void Assembler::cmovl(Condition cc, Register dst, Register src) {
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F,
-             0x40 | cc,
-             0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding(0x40 | cc, 0xC0, encode);
 }
 
-
 void Assembler::cmovl(Condition cc, Register dst, Address src) {
   InstructionMark im(this);
   NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction"));
-  prefix(src, dst);
-  emit_int16(0x0F, (0x40 | cc));
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((0x40 | cc));
   emit_operand(dst, src, 0);
 }
 
@@ -1840,16 +1903,16 @@ void Assembler::cmpw(Address dst, Register reg) {
 // The ZF is set if the compared values were equal, and cleared otherwise.
 void Assembler::cmpxchgl(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
-  prefix(adr, reg);
-  emit_int16(0x0F, (unsigned char)0xB1);
+  prefix(adr, reg, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr, 0);
 }
 
 void Assembler::cmpxchgw(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
   size_prefix();
-  prefix(adr, reg);
-  emit_int16(0x0F, (unsigned char)0xB1);
+  prefix(adr, reg, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xB1);
   emit_operand(reg, adr, 0);
 }
 
@@ -1858,8 +1921,8 @@ void Assembler::cmpxchgw(Register reg, Address adr) { // cmpxchg
 // The ZF is set if the compared values were equal, and cleared otherwise.
 void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg
   InstructionMark im(this);
-  prefix(adr, reg, true);
-  emit_int16(0x0F, (unsigned char)0xB0);
+  prefix(adr, reg, true, true /* is_map1 */);
+  emit_int8((unsigned char)0xB0);
   emit_operand(reg, adr, 0);
 }
 
@@ -1917,69 +1980,83 @@ void Assembler::cpuid() {
 // F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
 void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
   assert(VM_Version::supports_sse4_2(), "");
-  int8_t w = 0x01;
-  Prefix p = Prefix_EMPTY;
+  if (needs_eevex(crc, v)) {
+    InstructionAttr attributes(AVX_128bit, /* rex_w */ sizeInBytes == 8, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+    int encode = vex_prefix_and_encode(crc->encoding(), 0, v->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, true);
+    emit_int16(sizeInBytes == 1 ? (unsigned char)0xF0 : (unsigned char)0xF1, (0xC0 | encode));
+  } else {
+    int8_t w = 0x01;
+    Prefix p = Prefix_EMPTY;
 
-  emit_int8((unsigned char)0xF2);
-  switch (sizeInBytes) {
-  case 1:
-    w = 0;
-    break;
-  case 2:
-  case 4:
-    break;
-  LP64_ONLY(case 8:)
-    // This instruction is not valid in 32 bits
-    // Note:
-    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-    //
-    // Page B - 72   Vol. 2C says
-    // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
-    // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
-    //                                                                            F0!!!
-    // while 3 - 208 Vol. 2A
-    // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
-    //
-    // the 0 on a last bit is reserved for a different flavor of this instruction :
-    // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
-    p = REX_W;
-    break;
-  default:
-    assert(0, "Unsupported value for a sizeInBytes argument");
-    break;
+    emit_int8((unsigned char)0xF2);
+    switch (sizeInBytes) {
+    case 1:
+      w = 0;
+      break;
+    case 2:
+    case 4:
+      break;
+    LP64_ONLY(case 8:)
+      // This instruction is not valid in 32 bits
+      // Note:
+      // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+      //
+      // Page B - 72   Vol. 2C says
+      // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
+      // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
+      //                                                                            F0!!!
+      // while 3 - 208 Vol. 2A
+      // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
+      //
+      // the 0 on a last bit is reserved for a different flavor of this instruction :
+      // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
+      p = REX_W;
+      break;
+    default:
+      assert(0, "Unsupported value for a sizeInBytes argument");
+      break;
+    }
+    LP64_ONLY(prefix(crc, v, p);)
+    emit_int32(0x0F,
+               0x38,
+               0xF0 | w,
+               0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
   }
-  LP64_ONLY(prefix(crc, v, p);)
-  emit_int32(0x0F,
-             0x38,
-             0xF0 | w,
-             0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
 }
 
 void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
-  int8_t w = 0x01;
-  Prefix p = Prefix_EMPTY;
+  if (needs_eevex(crc, adr.base(), adr.index())) {
+    InstructionAttr attributes(AVX_128bit, /* vex_w */ sizeInBytes == 8, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+    attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
+    vex_prefix(adr, 0, crc->encoding(), sizeInBytes == 2 ? VEX_SIMD_66 : VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes);
+    emit_int8(sizeInBytes == 1 ? (unsigned char)0xF0 : (unsigned char)0xF1);
+    emit_operand(crc, adr, 0);
+  } else {
+    int8_t w = 0x01;
+    Prefix p = Prefix_EMPTY;
 
-  emit_int8((uint8_t)0xF2);
-  switch (sizeInBytes) {
-  case 1:
-    w = 0;
-    break;
-  case 2:
-  case 4:
-    break;
-  LP64_ONLY(case 8:)
-    // This instruction is not valid in 32 bits
-    p = REX_W;
-    break;
-  default:
-    assert(0, "Unsupported value for a sizeInBytes argument");
-    break;
+    emit_int8((uint8_t)0xF2);
+    switch (sizeInBytes) {
+    case 1:
+      w = 0;
+      break;
+    case 2:
+    case 4:
+      break;
+    LP64_ONLY(case 8:)
+      // This instruction is not valid in 32 bits
+      p = REX_W;
+      break;
+    default:
+      assert(0, "Unsupported value for a sizeInBytes argument");
+      break;
+    }
+    LP64_ONLY(prefix(crc, adr, p);)
+    emit_int24(0x0F, 0x38, (0xF0 | w));
+    emit_operand(crc, adr, 0);
   }
-  LP64_ONLY(prefix(crc, adr, p);)
-  emit_int24(0x0F, 0x38, (0xF0 | w));
-  emit_operand(crc, adr, 0);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
@@ -2081,7 +2158,7 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x2A, (0xC0 | encode));
 }
 
@@ -2098,7 +2175,7 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x2A, (0xC0 | encode));
 }
 
@@ -2115,7 +2192,7 @@ void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x2A, (0xC0 | encode));
 }
 
@@ -2418,10 +2495,8 @@ void Assembler::imull(Register src) {
 }
 
 void Assembler::imull(Register dst, Register src) {
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F,
-             (unsigned char)0xAF,
-             (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xAF, 0xC0, encode);
 }
 
 void Assembler::imull(Register dst, Address src, int32_t value) {
@@ -2450,8 +2525,8 @@ void Assembler::imull(Register dst, Register src, int value) {
 
 void Assembler::imull(Register dst, Address src) {
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xAF);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xAF);
   emit_operand(dst, src, 0);
 }
 
@@ -2587,7 +2662,10 @@ void Assembler::jmpb_0(Label& L, const char* file, int line) {
 }
 
 void Assembler::ldmxcsr( Address src) {
-  if (UseAVX > 0 ) {
+  // This instruction should be SSE encoded with the REX2 prefix when an
+  // extended GPR is present. To be consistent when UseAPX is enabled, use
+  // this encoding even when an extended GPR is not used.
+  if (UseAVX > 0 && !UseAPX ) {
     InstructionMark im(this);
     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
     vex_prefix(src, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
@@ -2596,8 +2674,8 @@ void Assembler::ldmxcsr( Address src) {
   } else {
     NOT_LP64(assert(VM_Version::supports_sse(), ""));
     InstructionMark im(this);
-    prefix(src);
-    emit_int16(0x0F, (unsigned char)0xAE);
+    prefix(src, true /* is_map1 */);
+    emit_int8((unsigned char)0xAE);
     emit_operand(as_Register(2), src, 0);
   }
 }
@@ -2624,16 +2702,16 @@ void Assembler::size_prefix() {
 void Assembler::lzcntl(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
   emit_int8((unsigned char)0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
 }
 
 void Assembler::lzcntl(Register dst, Address src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
   InstructionMark im(this);
   emit_int8((unsigned char)0xF3);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBD);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xBD);
   emit_operand(dst, src, 0);
 }
 
@@ -2725,28 +2803,28 @@ void Assembler::kmovbl(KRegister dst, KRegister src) {
 
 void Assembler::kmovbl(KRegister dst, Register src) {
   assert(VM_Version::supports_avx512dq(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16((unsigned char)0x92, (0xC0 | encode));
 }
 
 void Assembler::kmovbl(Register dst, KRegister src) {
   assert(VM_Version::supports_avx512dq(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int16((unsigned char)0x93, (0xC0 | encode));
 }
 
 void Assembler::kmovwl(KRegister dst, Register src) {
   assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes, true);
   emit_int16((unsigned char)0x92, (0xC0 | encode));
 }
 
 void Assembler::kmovwl(Register dst, KRegister src) {
   assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int16((unsigned char)0x93, (0xC0 | encode));
 }
@@ -2754,7 +2832,8 @@ void Assembler::kmovwl(Register dst, KRegister src) {
 void Assembler::kmovwl(KRegister dst, Address src) {
   assert(VM_Version::supports_evex(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand(dst, src, 0);
@@ -2763,7 +2842,8 @@ void Assembler::kmovwl(KRegister dst, Address src) {
 void Assembler::kmovwl(Address dst, KRegister src) {
   assert(VM_Version::supports_evex(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x91);
   emit_operand(src, dst, 0);
@@ -2778,14 +2858,14 @@ void Assembler::kmovwl(KRegister dst, KRegister src) {
 
 void Assembler::kmovdl(KRegister dst, Register src) {
   assert(VM_Version::supports_avx512bw(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true);
   emit_int16((unsigned char)0x92, (0xC0 | encode));
 }
 
 void Assembler::kmovdl(Register dst, KRegister src) {
   assert(VM_Version::supports_avx512bw(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int16((unsigned char)0x93, (0xC0 | encode));
 }
@@ -2800,7 +2880,8 @@ void Assembler::kmovql(KRegister dst, KRegister src) {
 void Assembler::kmovql(KRegister dst, Address src) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand(dst, src, 0);
@@ -2809,7 +2890,8 @@ void Assembler::kmovql(KRegister dst, Address src) {
 void Assembler::kmovql(Address dst, KRegister src) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x91);
   emit_operand(src, dst, 0);
@@ -2817,14 +2899,14 @@ void Assembler::kmovql(Address dst, KRegister src) {
 
 void Assembler::kmovql(KRegister dst, Register src) {
   assert(VM_Version::supports_avx512bw(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true);
   emit_int16((unsigned char)0x92, (0xC0 | encode));
 }
 
 void Assembler::kmovql(Register dst, KRegister src) {
   assert(VM_Version::supports_avx512bw(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int16((unsigned char)0x93, (0xC0 | encode));
 }
@@ -3105,7 +3187,7 @@ void Assembler::movb(Address dst, Register src) {
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x6E, (0xC0 | encode));
 }
 
@@ -3113,7 +3195,7 @@ void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x7E, (0xC0 | encode));
 }
 
@@ -3582,28 +3664,28 @@ void Assembler::movq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x7E, (0xC0 | encode));
 }
 
 void Assembler::movq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x6E, (0xC0 | encode));
 }
 
 void Assembler::movsbl(Register dst, Address src) { // movsxb
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBE);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xBE);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movsbl(Register dst, Register src) { // movsxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
-  emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true, true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBE, 0xC0, encode);
 }
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
@@ -3675,14 +3757,14 @@ void Assembler::movss(Address dst, XMMRegister src) {
 
 void Assembler::movswl(Register dst, Address src) { // movsxw
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBF);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xBF);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movswl(Register dst, Register src) { // movsxw
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBF, 0xC0, encode);
 }
 
 void Assembler::movups(XMMRegister dst, Address src) {
@@ -3753,27 +3835,27 @@ void Assembler::movw(Address dst, Register src) {
 
 void Assembler::movzbl(Register dst, Address src) { // movzxb
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xB6);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xB6);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movzbl(Register dst, Register src) { // movzxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
-  emit_int24(0x0F, (unsigned char)0xB6, 0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true, true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB6, 0xC0, encode);
 }
 
 void Assembler::movzwl(Register dst, Address src) { // movzxw
   InstructionMark im(this);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xB7);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xB7);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movzwl(Register dst, Register src) { // movzxw
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xB7, 0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB7, 0xC0, encode);
 }
 
 void Assembler::mull(Address src) {
@@ -4416,6 +4498,7 @@ void Assembler::ud2() {
 
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
+  assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs");
   InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -4743,7 +4826,7 @@ void Assembler::vmovmskpd(Register dst, XMMRegister src, int vec_enc) {
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x16, (0xC0 | encode), imm8);
 }
 
@@ -4761,7 +4844,7 @@ void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x16, (0xC0 | encode), imm8);
 }
 
@@ -4797,7 +4880,7 @@ void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
 void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x14, (0xC0 | encode), imm8);
 }
 
@@ -4815,7 +4898,7 @@ void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x22, (0xC0 | encode), imm8);
 }
 
@@ -4833,14 +4916,14 @@ void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
 void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x22, (0xC0 | encode), imm8);
 }
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x22, (0xC0 | encode), imm8);
 }
 
@@ -4858,14 +4941,14 @@ void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
 void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(),  nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x22, (0xC0 | encode), imm8);
 }
 
 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
 }
 
@@ -4883,7 +4966,7 @@ void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
 void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
 }
 
@@ -4901,14 +4984,14 @@ void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
 void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x20, (0xC0 | encode), imm8);
 }
 
 void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
   assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24(0x20, (0xC0 | encode), imm8);
 }
 
@@ -5311,16 +5394,16 @@ void Assembler::popcntl(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
   emit_int8((unsigned char)0xF3);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xB8);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xB8);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::popcntl(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
   emit_int8((unsigned char)0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB8, 0xC0, encode);
 }
 
 void Assembler::evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
@@ -5392,48 +5475,48 @@ void Assembler::popl(Address dst) {
 void Assembler::prefetchnta(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x18);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x18);
   emit_operand(rax, src, 0); // 0, src
 }
 
 void Assembler::prefetchr(Address src) {
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x0D);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x0D);
   emit_operand(rax, src, 0); // 0, src
 }
 
 void Assembler::prefetcht0(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x18);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x18);
   emit_operand(rcx, src, 0); // 1, src
 }
 
 void Assembler::prefetcht1(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x18);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x18);
   emit_operand(rdx, src, 0); // 2, src
 }
 
 void Assembler::prefetcht2(Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), "must support"));
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x18);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x18);
   emit_operand(rbx, src, 0); // 3, src
 }
 
 void Assembler::prefetchw(Address src) {
   assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
-  prefix(src);
-  emit_int16(0x0F, 0x0D);
+  prefix(src, true /* is_map1 */);
+  emit_int8(0x0D);
   emit_operand(rcx, src, 0); // 1, src
 }
 
@@ -5441,6 +5524,12 @@ void Assembler::prefix(Prefix p) {
   emit_int8(p);
 }
 
+void Assembler::prefix16(int prefix) {
+  assert(UseAPX, "APX features not enabled");
+  emit_int8((prefix & 0xff00) >> 8);
+  emit_int8(prefix & 0xff);
+}
+
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -5648,6 +5737,7 @@ void Assembler::vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs");
   InstructionMark im(this);
   InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -5665,6 +5755,7 @@ void Assembler::ptest(XMMRegister dst, XMMRegister src) {
 
 void Assembler::vptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
+  assert(!needs_eevex(src.base(), src.index()), "does not support extended gprs");
   InstructionMark im(this);
   InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
   assert(dst != xnoreg, "sanity");
@@ -6078,8 +6169,8 @@ void Assembler::sbbl(Register dst, Register src) {
 
 void Assembler::setb(Condition cc, Register dst) {
   assert(0 <= cc && cc < 16, "illegal cc");
-  int encode = prefix_and_encode(dst->encoding(), true);
-  emit_int24(0x0F, (unsigned char)0x90 | cc, (0xC0 | encode));
+  int encode = prefix_and_encode(dst->encoding(), true, true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0x90 | cc, 0xC0, encode);
 }
 
 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
@@ -6212,34 +6303,34 @@ void Assembler::shrl(Address dst, int imm8) {
 
 
 void Assembler::shldl(Register dst, Register src) {
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_int24(0x0F, (unsigned char)0xA5, (0xC0 | encode));
+  int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xA5, 0xC0, encode);
 }
 
 void Assembler::shldl(Register dst, Register src, int8_t imm8) {
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
+  int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xA4, 0xC0, encode, imm8);
 }
 
 void Assembler::shrdl(Register dst, Register src) {
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_int24(0x0F, (unsigned char)0xAD, (0xC0 | encode));
+  int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xAD, 0xC0, encode);
 }
 
 void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
-  int encode = prefix_and_encode(src->encoding(), dst->encoding());
-  emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
+  int encode = prefix_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xAC, 0xC0, encode, imm8);
 }
 
 #ifdef _LP64
 void Assembler::shldq(Register dst, Register src, int8_t imm8) {
-  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
-  emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
+  int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xA4, 0xC0, encode, imm8);
 }
 
 void Assembler::shrdq(Register dst, Register src, int8_t imm8) {
-  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
-  emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
+  int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xAC, 0xC0, encode, imm8);
 }
 #endif
 
@@ -6305,8 +6396,11 @@ void Assembler::sqrtss(XMMRegister dst, Address src) {
   emit_operand(dst, src, 0);
 }
 
-void Assembler::stmxcsr( Address dst) {
-  if (UseAVX > 0 ) {
+void Assembler::stmxcsr(Address dst) {
+  // This instruction should be SSE encoded with the REX2 prefix when an
+  // extended GPR is present. To be consistent when UseAPX is enabled, use
+  // this encoding even when an extended GPR is not used.
+  if (UseAVX > 0 && !UseAPX  ) {
     assert(VM_Version::supports_avx(), "");
     InstructionMark im(this);
     InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -6316,8 +6410,8 @@ void Assembler::stmxcsr( Address dst) {
   } else {
     NOT_LP64(assert(VM_Version::supports_sse(), ""));
     InstructionMark im(this);
-    prefix(dst);
-    emit_int16(0x0F, (unsigned char)0xAE);
+    prefix(dst, true /* is_map1 */);
+    emit_int8((unsigned char)0xAE);
     emit_operand(as_Register(3), dst, 0);
   }
 }
@@ -6457,34 +6551,32 @@ void Assembler::testl(Register dst, Address src) {
 void Assembler::tzcntl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
   emit_int8((unsigned char)0xF3);
-  int encode = prefix_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F,
-             (unsigned char)0xBC,
-             0xC0 | encode);
+  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
 }
 
 void Assembler::tzcntl(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
   InstructionMark im(this);
   emit_int8((unsigned char)0xF3);
-  prefix(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBC);
+  prefix(src, dst, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xBC);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::tzcntq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
   emit_int8((unsigned char)0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
 }
 
 void Assembler::tzcntq(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "tzcnt instruction not supported");
   InstructionMark im(this);
   emit_int8((unsigned char)0xF3);
-  prefixq(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBC);
+  prefixq(src, dst, true /* is_map1 */);
+  emit_int8((unsigned char)0xBC);
   emit_operand(dst, src, 0);
 }
 
@@ -6530,23 +6622,23 @@ void Assembler::xabort(int8_t imm8) {
 
 void Assembler::xaddb(Address dst, Register src) {
   InstructionMark im(this);
-  prefix(dst, src, true);
-  emit_int16(0x0F, (unsigned char)0xC0);
+  prefix(dst, src, true, true /* is_map1 */);
+  emit_int8((unsigned char)0xC0);
   emit_operand(src, dst, 0);
 }
 
 void Assembler::xaddw(Address dst, Register src) {
   InstructionMark im(this);
   emit_int8(0x66);
-  prefix(dst, src);
-  emit_int16(0x0F, (unsigned char)0xC1);
+  prefix(dst, src, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst, 0);
 }
 
 void Assembler::xaddl(Address dst, Register src) {
   InstructionMark im(this);
-  prefix(dst, src);
-  emit_int16(0x0F, (unsigned char)0xC1);
+  prefix(dst, src, false, true /* is_map1 */);
+  emit_int8((unsigned char)0xC1);
   emit_operand(src, dst, 0);
 }
 
@@ -9116,7 +9208,7 @@ void Assembler::extractps(Register dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes, true);
   // imm8:
   // 0x00 - extract from bits 31:0
   // 0x01 - extract from bits 63:32
@@ -10886,7 +10978,7 @@ void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16(0x7A, (0xC0 | encode));
 }
 
@@ -10895,7 +10987,7 @@ void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes , true);
   emit_int16(0x7B, (0xC0 | encode));
 }
 
@@ -10904,7 +10996,7 @@ void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16(0x7C, (0xC0 | encode));
 }
 
@@ -10913,7 +11005,7 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16(0x7C, (0xC0 | encode));
 }
 
@@ -11673,7 +11765,8 @@ void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexS
 }
 
 // This is a 4 byte encoding
-void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool eevex_b, bool evex_v,
+                       bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
   // EVEX 0x62 prefix
   // byte1 = EVEX_4bytes;
 
@@ -11682,18 +11775,18 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
   // EVEX.b is not currently used for broadcast of single element or data rounding modes
   _attributes->set_evex_encoding(evex_encoding);
 
-  // P0: byte 2, initialized to RXBR`00mm
+  // P0: byte 2, initialized to RXBR'0mmm
   // instead of not'd
   int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
   byte2 = (~byte2) & 0xF0;
+  byte2 |= eevex_b ? EEVEX_B : 0;
   // confine opc opcode extensions in mm bits to lower two bits
-  // of form {0F, 0F_38, 0F_3A}
+  // of form {0F, 0F_38, 0F_3A, 0F_3C}
   byte2 |= opc;
 
   // P1: byte 3 as Wvvvv1pp
   int byte3 = ((~nds_enc) & 0xf) << 3;
-  // p[10] is always 1
-  byte3 |= EVEX_F;
+  byte3 |= (eevex_x ? 0 : EEVEX_X);
   byte3 |= (vex_w & 1) << 7;
   // confine pre opcode extensions in pp bits to lower two bits
   // of form {66, F3, F2}
@@ -11720,6 +11813,10 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
 }
 
 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    assert(UseAPX, "APX features not enabled");
+  }
+  bool is_extended = adr.base_needs_rex2() || adr.index_needs_rex2() || nds_enc >= 16 || xreg_enc >= 16;
   bool vex_r = (xreg_enc & 8) == 8;
   bool vex_b = adr.base_needs_rex();
   bool vex_x;
@@ -11729,13 +11826,12 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
     vex_x = adr.index_needs_rex();
   }
   set_attributes(attributes);
-
   // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
   // is allowed in legacy mode and has resources which will fit in it.
   // Pure EVEX instructions will have is_evex_instruction set in their definition.
   if (!attributes->is_legacy_mode()) {
     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
-      if ((attributes->get_vector_len() != AVX_512bit) && (nds_enc < 16) && (xreg_enc < 16)) {
+      if ((attributes->get_vector_len() != AVX_512bit) && !is_extended) {
           attributes->set_is_legacy_mode();
       }
     }
@@ -11746,7 +11842,7 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
             (attributes->get_vector_len() == AVX_512bit) ||
             (!_legacy_mode_vl) ||
             (attributes->is_legacy_mode())),"XMM register should be 0-15");
-    assert(((nds_enc < 16 && xreg_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
+    assert((!is_extended || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
   }
 
   clear_managed();
@@ -11760,8 +11856,10 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
     } else {
       evex_v = (nds_enc >= 16);
     }
+    bool eevex_x = adr.index_needs_rex2();
+    bool eevex_b = adr.base_needs_rex2();
     attributes->set_is_evex_instruction();
-    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, eevex_b, evex_v, eevex_x, nds_enc, pre, opc);
   } else {
     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
       attributes->set_rex_vex_w(false);
@@ -11770,7 +11868,11 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix
   }
 }
 
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr) {
+  if (src_is_gpr && src_enc >= 16) {
+    assert(UseAPX, "APX features not enabled");
+  }
+  bool is_extended = dst_enc >= 16 || nds_enc >= 16 || src_enc >=16;
   bool vex_r = (dst_enc & 8) == 8;
   bool vex_b = (src_enc & 8) == 8;
   bool vex_x = false;
@@ -11782,7 +11884,7 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
   if (!attributes->is_legacy_mode()) {
     if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) {
       if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) &&
-          (dst_enc < 16) && (nds_enc < 16) && (src_enc < 16)) {
+           !is_extended) {
           attributes->set_is_legacy_mode();
       }
     }
@@ -11799,7 +11901,7 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
             (!_legacy_mode_vl) ||
             (attributes->is_legacy_mode())),"XMM register should be 0-15");
     // Instruction with legacy_mode true should have dst, nds and src < 15
-    assert(((dst_enc < 16 && nds_enc < 16 && src_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
+    assert(((!is_extended) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
   }
 
   clear_managed();
@@ -11807,10 +11909,11 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
   {
     bool evex_r = (dst_enc >= 16);
     bool evex_v = (nds_enc >= 16);
+    bool evex_b = (src_enc >= 16) && src_is_gpr;
     // can use vex_x as bank extender on rm encoding
-    vex_x = (src_enc >= 16);
+    vex_x = (src_enc >= 16) && !src_is_gpr;
     attributes->set_is_evex_instruction();
-    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_b, evex_v, false /*eevex_x*/, nds_enc, pre, opc);
   } else {
     if (UseAVX > 2 && attributes->is_rex_vex_w_reverted()) {
       attributes->set_rex_vex_w(false);
@@ -11822,7 +11925,6 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS
   return (((dst_enc & 7) << 3) | (src_enc & 7));
 }
 
-
 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
                             VexOpcode opc, InstructionAttr *attributes) {
   if (UseAVX > 0) {
@@ -11836,12 +11938,12 @@ void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexS
 }
 
 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
-                                      VexOpcode opc, InstructionAttr *attributes) {
+                                      VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr) {
   int dst_enc = dst->encoding();
   int src_enc = src->encoding();
   if (UseAVX > 0) {
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes, src_is_gpr);
   } else {
     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
@@ -12336,50 +12438,51 @@ void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XM
 
 void Assembler::bzhiq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::bzhil(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::pextl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::pdepl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::pextq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::pdepq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF5, (0xC0 | encode));
 }
 
 void Assembler::pextl(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF5);
   emit_operand(dst, src2, 0);
@@ -12388,7 +12491,8 @@ void Assembler::pextl(Register dst, Register src1, Address src2) {
 void Assembler::pdepl(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF5);
   emit_operand(dst, src2, 0);
@@ -12397,7 +12501,8 @@ void Assembler::pdepl(Register dst, Register src1, Address src2) {
 void Assembler::pextq(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF5);
   emit_operand(dst, src2, 0);
@@ -12406,7 +12511,8 @@ void Assembler::pextq(Register dst, Register src1, Address src2) {
 void Assembler::pdepq(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF5);
   emit_operand(dst, src2, 0);
@@ -12414,15 +12520,16 @@ void Assembler::pdepq(Register dst, Register src1, Address src2) {
 
 void Assembler::sarxl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::sarxl(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12430,15 +12537,16 @@ void Assembler::sarxl(Register dst, Address src1, Register src2) {
 
 void Assembler::sarxq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::sarxq(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12446,15 +12554,16 @@ void Assembler::sarxq(Register dst, Address src1, Register src2) {
 
 void Assembler::shlxl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::shlxl(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12462,15 +12571,16 @@ void Assembler::shlxl(Register dst, Address src1, Register src2) {
 
 void Assembler::shlxq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::shlxq(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12478,15 +12588,16 @@ void Assembler::shlxq(Register dst, Address src1, Register src2) {
 
 void Assembler::shrxl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::shrxl(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12494,15 +12605,16 @@ void Assembler::shrxl(Register dst, Address src1, Register src2) {
 
 void Assembler::shrxq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF7, (0xC0 | encode));
 }
 
 void Assembler::shrxq(Register dst, Address src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src1, src2->encoding(), dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_operand(dst, src1, 0);
@@ -12806,13 +12918,48 @@ void Assembler::emit_data64(jlong data,
   emit_int64(data);
 }
 
+int Assembler::get_base_prefix_bits(int enc) {
+  int bits = 0;
+  if (enc & 16) bits |= REX2BIT_B4;
+  if (enc & 8) bits |= REXBIT_B;
+  return bits;
+}
+
+int Assembler::get_index_prefix_bits(int enc) {
+  int bits = 0;
+  if (enc & 16) bits |= REX2BIT_X4;
+  if (enc & 8) bits |= REXBIT_X;
+  return bits;
+}
+
+int Assembler::get_base_prefix_bits(Register base) {
+  return base->is_valid() ? get_base_prefix_bits(base->encoding()) : 0;
+}
+
+int Assembler::get_index_prefix_bits(Register index) {
+  return index->is_valid() ? get_index_prefix_bits(index->encoding()) : 0;
+}
+
+int Assembler::get_reg_prefix_bits(int enc) {
+  int bits = 0;
+  if (enc & 16) bits |= REX2BIT_R4;
+  if (enc & 8) bits |= REXBIT_R;
+  return bits;
+}
+
 void Assembler::prefix(Register reg) {
-  if (reg->encoding() >= 8) {
+  if (reg->encoding() >= 16) {
+    prefix16(WREX2 | get_base_prefix_bits(reg->encoding()));
+  } else if (reg->encoding() >= 8) {
     prefix(REX_B);
   }
 }
 
 void Assembler::prefix(Register dst, Register src, Prefix p) {
+  if ((p & WREX2) || src->encoding() >= 16 || dst->encoding() >= 16) {
+    prefix_rex2(dst, src);
+    return;
+  }
   if (src->encoding() >= 8) {
     p = (Prefix)(p | REX_B);
   }
@@ -12825,7 +12972,17 @@ void Assembler::prefix(Register dst, Register src, Prefix p) {
   }
 }
 
+void Assembler::prefix_rex2(Register dst, Register src) {
+  int bits = 0;
+  bits |= get_base_prefix_bits(src->encoding());
+  bits |= get_reg_prefix_bits(dst->encoding());
+  prefix16(WREX2 | bits);
+}
+
 void Assembler::prefix(Register dst, Address adr, Prefix p) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2() || dst->encoding() >= 16) {
+    prefix_rex2(dst, adr);
+  }
   if (adr.base_needs_rex()) {
     if (adr.index_needs_rex()) {
       assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
@@ -12846,7 +13003,19 @@ void Assembler::prefix(Register dst, Address adr, Prefix p) {
   }
 }
 
-void Assembler::prefix(Address adr) {
+void Assembler::prefix_rex2(Register dst, Address adr) {
+  assert(!adr.index_needs_rex2(), "prefix(Register dst, Address adr) does not support handling of an X");
+  int bits = 0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_reg_prefix_bits(dst->encoding());
+  prefix16(WREX2 | bits);
+}
+
+void Assembler::prefix(Address adr, bool is_map1) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    prefix_rex2(adr, is_map1);
+    return;
+  }
   if (adr.base_needs_rex()) {
     if (adr.index_needs_rex()) {
       prefix(REX_XB);
@@ -12858,9 +13027,21 @@ void Assembler::prefix(Address adr) {
       prefix(REX_X);
     }
   }
+  if (is_map1) emit_int8(0x0F);
 }
 
-void Assembler::prefix(Address adr, Register reg, bool byteinst) {
+void Assembler::prefix_rex2(Address adr, bool is_map1) {
+  int bits = is_map1 ? REX2BIT_M0 : 0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  prefix16(WREX2 | bits);
+}
+
+void Assembler::prefix(Address adr, Register reg, bool byteinst, bool is_map1) {
+  if (reg->encoding() >= 16 || adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    prefix_rex2(adr, reg, byteinst, is_map1);
+    return;
+  }
   if (reg->encoding() < 8) {
     if (adr.base_needs_rex()) {
       if (adr.index_needs_rex()) {
@@ -12890,9 +13071,22 @@ void Assembler::prefix(Address adr, Register reg, bool byteinst) {
       }
     }
   }
+  if (is_map1) emit_int8(0x0F);
+}
+
+void Assembler::prefix_rex2(Address adr, Register reg, bool byteinst, bool is_map1) {
+  int bits = is_map1 ? REX2BIT_M0 : 0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  bits |= get_reg_prefix_bits(reg->encoding());
+  prefix16(WREX2 | bits);
 }
 
 void Assembler::prefix(Address adr, XMMRegister reg) {
+  if (reg->encoding() >= 16 || adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    prefixq_rex2(adr, reg);
+    return;
+  }
   if (reg->encoding() < 8) {
     if (adr.base_needs_rex()) {
       if (adr.index_needs_rex()) {
@@ -12922,17 +13116,37 @@ void Assembler::prefix(Address adr, XMMRegister reg) {
   }
 }
 
-int Assembler::prefix_and_encode(int reg_enc, bool byteinst) {
+void Assembler::prefix_rex2(Address adr, XMMRegister src) {
+  int bits = 0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  bits |= get_reg_prefix_bits(src->encoding());
+  prefix16(WREX2 | bits);
+}
+
+int Assembler::prefix_and_encode(int reg_enc, bool byteinst, bool is_map1) {
+  if (reg_enc >= 16) {
+    return prefix_and_encode_rex2(reg_enc, is_map1);
+  }
   if (reg_enc >= 8) {
     prefix(REX_B);
     reg_enc -= 8;
   } else if (byteinst && reg_enc >= 4) {
     prefix(REX);
   }
-  return reg_enc;
+  int opc_prefix = is_map1 ? 0x0F00 : 0;
+  return opc_prefix | reg_enc;
 }
 
-int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
+int Assembler::prefix_and_encode_rex2(int reg_enc, bool is_map1) {
+  prefix16(WREX2 | (is_map1 ? REX2BIT_M0 : 0) | get_base_prefix_bits(reg_enc));
+  return reg_enc & 0x7;
+}
+
+int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte, bool is_map1) {
+  if (src_enc >= 16 || dst_enc >= 16) {
+    return prefix_and_encode_rex2(dst_enc, src_enc, is_map1 ? REX2BIT_M0 : 0);
+  }
   if (dst_enc < 8) {
     if (src_enc >= 8) {
       prefix(REX_B);
@@ -12949,16 +13163,46 @@ int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, boo
     }
     dst_enc -= 8;
   }
+  int opcode_prefix = is_map1 ? 0x0F00 : 0;
+  return opcode_prefix | (dst_enc << 3 | src_enc);
+}
+
+int Assembler::prefix_and_encode_rex2(int dst_enc, int src_enc, int init_bits) {
+  int bits = init_bits;
+  bits |= get_reg_prefix_bits(dst_enc);
+  bits |= get_base_prefix_bits(src_enc);
+  dst_enc &= 0x7;
+  src_enc &= 0x7;
+  prefix16(WREX2 | bits);
   return dst_enc << 3 | src_enc;
 }
 
-int8_t Assembler::get_prefixq(Address adr) {
-  int8_t prfx = get_prefixq(adr, rax);
-  assert(REX_W <= prfx && prfx <= REX_WXB, "must be");
-  return prfx;
+bool Assembler::prefix_is_rex2(int prefix) {
+  return (prefix & 0xFF00) == WREX2;
 }
 
-int8_t Assembler::get_prefixq(Address adr, Register src) {
+int Assembler::get_prefixq(Address adr, bool is_map1) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    return get_prefixq_rex2(adr, is_map1);
+  }
+  int8_t prfx = get_prefixq(adr, rax);
+  assert(REX_W <= prfx && prfx <= REX_WXB, "must be");
+  return is_map1 ? (((int16_t)prfx) << 8) | 0x0F : (int16_t)prfx;
+}
+
+int Assembler::get_prefixq_rex2(Address adr, bool is_map1) {
+  assert(UseAPX, "APX features not enabled");
+  int bits = REXBIT_W;
+  if (is_map1) bits |= REX2BIT_M0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  return WREX2 | bits;
+}
+
+int Assembler::get_prefixq(Address adr, Register src, bool is_map1) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2() || src->encoding() >= 16) {
+    return get_prefixq_rex2(adr, src, is_map1);
+  }
   int8_t prfx = (int8_t)(REX_W +
                          ((int)adr.base_needs_rex()) +
                          ((int)adr.index_needs_rex() << 1) +
@@ -12994,18 +13238,42 @@ int8_t Assembler::get_prefixq(Address adr, Register src) {
     }
   }
 #endif
-  return prfx;
+  return is_map1 ? (((int16_t)prfx) << 8) | 0x0F : (int16_t)prfx;
+}
+
+int Assembler::get_prefixq_rex2(Address adr, Register src, bool is_map1) {
+  assert(UseAPX, "APX features not enabled");
+  int bits = REXBIT_W;
+  if (is_map1) bits |= REX2BIT_M0;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  bits |= get_reg_prefix_bits(src->encoding());
+  return WREX2 | bits;
 }
 
 void Assembler::prefixq(Address adr) {
-  emit_int8(get_prefixq(adr));
+  if (adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    prefix16(get_prefixq_rex2(adr));
+  } else {
+    emit_int8(get_prefixq(adr));
+  }
 }
 
-void Assembler::prefixq(Address adr, Register src) {
-  emit_int8(get_prefixq(adr, src));
+void Assembler::prefixq(Address adr, Register src, bool is_map1) {
+  if (adr.base_needs_rex2() || adr.index_needs_rex2() || src->encoding() >= 16) {
+    prefix16(get_prefixq_rex2(adr, src, is_map1));
+  } else {
+    emit_int8(get_prefixq(adr, src));
+    if (is_map1) emit_int8(0x0F);
+  }
 }
 
+
 void Assembler::prefixq(Address adr, XMMRegister src) {
+  if (src->encoding() >= 16 || adr.base_needs_rex2() || adr.index_needs_rex2()) {
+    prefixq_rex2(adr, src);
+    return;
+  }
   if (src->encoding() < 8) {
     if (adr.base_needs_rex()) {
       if (adr.index_needs_rex()) {
@@ -13037,17 +13305,38 @@ void Assembler::prefixq(Address adr, XMMRegister src) {
   }
 }
 
-int Assembler::prefixq_and_encode(int reg_enc) {
+void Assembler::prefixq_rex2(Address adr, XMMRegister src) {
+  int bits = REXBIT_W;
+  bits |= get_base_prefix_bits(adr.base());
+  bits |= get_index_prefix_bits(adr.index());
+  bits |= get_reg_prefix_bits(src->encoding());
+  prefix16(WREX2 | bits);
+}
+
+int Assembler::prefixq_and_encode(int reg_enc, bool is_map1) {
+  if (reg_enc >= 16) {
+    return prefixq_and_encode_rex2(reg_enc, is_map1);
+  }
   if (reg_enc < 8) {
     prefix(REX_W);
   } else {
     prefix(REX_WB);
     reg_enc -= 8;
   }
-  return reg_enc;
+  int opcode_prefix = is_map1 ? 0x0F00 : 0;
+  return opcode_prefix | reg_enc;
 }
 
-int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
+
+int Assembler::prefixq_and_encode_rex2(int reg_enc, bool is_map1) {
+  prefix16(WREX2 | REXBIT_W | (is_map1 ? REX2BIT_M0: 0) | get_base_prefix_bits(reg_enc));
+  return reg_enc & 0x7;
+}
+
+int Assembler::prefixq_and_encode(int dst_enc, int src_enc, bool is_map1) {
+  if (dst_enc >= 16 || src_enc >= 16) {
+    return prefixq_and_encode_rex2(dst_enc, src_enc, is_map1);
+  }
   if (dst_enc < 8) {
     if (src_enc < 8) {
       prefix(REX_W);
@@ -13064,7 +13353,22 @@ int Assembler::prefixq_and_encode(int dst_enc, int src_enc) {
     }
     dst_enc -= 8;
   }
-  return dst_enc << 3 | src_enc;
+  int opcode_prefix = is_map1 ? 0x0F00 : 0;
+  return opcode_prefix | (dst_enc << 3 | src_enc);
+}
+
+int Assembler::prefixq_and_encode_rex2(int dst_enc, int src_enc, bool is_map1) {
+  int init_bits = REXBIT_W | (is_map1 ? REX2BIT_M0 : 0);
+  return prefix_and_encode_rex2(dst_enc, src_enc, init_bits);
+}
+
+void Assembler::emit_prefix_and_int8(int prefix, int b1) {
+  if ((prefix & 0xFF00) == 0) {
+    emit_int16(prefix, b1);
+  } else {
+    assert((prefix & 0xFF00) != WREX2 || UseAPX, "APX features not enabled");
+    emit_int24((prefix & 0xFF00) >> 8, prefix & 0x00FF, b1);
+  }
 }
 
 void Assembler::adcq(Register dst, int32_t imm32) {
@@ -13074,7 +13378,7 @@ void Assembler::adcq(Register dst, int32_t imm32) {
 
 void Assembler::adcq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x13);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x13);
   emit_operand(dst, src, 0);
 }
 
@@ -13091,7 +13395,7 @@ void Assembler::addq(Address dst, int32_t imm32) {
 
 void Assembler::addq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), 0x01);
+  emit_prefix_and_int8(get_prefixq(dst, src), 0x01);
   emit_operand(src, dst, 0);
 }
 
@@ -13102,7 +13406,7 @@ void Assembler::addq(Register dst, int32_t imm32) {
 
 void Assembler::addq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x03);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x03);
   emit_operand(dst, src, 0);
 }
 
@@ -13113,24 +13417,35 @@ void Assembler::addq(Register dst, Register src) {
 
 void Assembler::adcxq(Register dst, Register src) {
   //assert(VM_Version::supports_adx(), "adx instructions not supported");
-  emit_int8(0x66);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int32(0x0F,
-             0x38,
-             (unsigned char)0xF6,
-             (0xC0 | encode));
+  if (needs_rex2(dst, src)) {
+    InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+    int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3C, &attributes, true);
+    emit_int16((unsigned char)0x66, (0xC0 | encode));
+  } else {
+    emit_int8(0x66);
+    int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+    emit_int32(0x0F,
+               0x38,
+               (unsigned char)0xF6,
+               (0xC0 | encode));
+  }
 }
 
 void Assembler::adoxq(Register dst, Register src) {
   //assert(VM_Version::supports_adx(), "adx instructions not supported");
-  emit_int8((unsigned char)0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int32(0x0F,
-             0x38,
-             (unsigned char)0xF6,
-             (0xC0 | encode));
+  if (needs_rex2(dst, src)) {
+    InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+    int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_3C, &attributes, true);
+    emit_int16((unsigned char)0x66, (0xC0 | encode));
+  } else {
+    emit_int8((unsigned char)0xF3);
+    int encode = prefixq_and_encode(dst->encoding(), src->encoding());
+    emit_int32(0x0F,
+               0x38,
+               (unsigned char)0xF6,
+               (0xC0 | encode));
+  }
 }
-
 void Assembler::andq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
@@ -13144,7 +13459,7 @@ void Assembler::andq(Register dst, int32_t imm32) {
 
 void Assembler::andq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x23);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x23);
   emit_operand(dst, src, 0);
 }
 
@@ -13155,52 +13470,54 @@ void Assembler::andq(Register dst, Register src) {
 
 void Assembler::andq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), 0x21);
+  emit_prefix_and_int8(get_prefixq(dst, src), 0x21);
   emit_operand(src, dst, 0);
 }
 
 void Assembler::andnq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF2, (0xC0 | encode));
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2, 0);
 }
 
 void Assembler::bsfq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBC, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBC, 0xC0, encode);
 }
 
 void Assembler::bsrq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
 }
 
 void Assembler::bswapq(Register reg) {
-  int encode = prefixq_and_encode(reg->encoding());
-  emit_int16(0x0F, (0xC8 | encode));
+  int encode = prefixq_and_encode(reg->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xC8, encode);
 }
 
 void Assembler::blsiq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3, (0xC0 | encode));
 }
 
 void Assembler::blsiq(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src, 0);
@@ -13208,15 +13525,16 @@ void Assembler::blsiq(Register dst, Address src) {
 
 void Assembler::blsmskq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(),  dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3, (0xC0 | encode));
 }
 
 void Assembler::blsmskq(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src, 0);
@@ -13224,15 +13542,16 @@ void Assembler::blsmskq(Register dst, Address src) {
 
 void Assembler::blsrq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF3, (0xC0 | encode));
 }
 
 void Assembler::blsrq(Register dst, Address src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src, 0);
@@ -13248,8 +13567,8 @@ void Assembler::cdqe() {
 
 void Assembler::clflush(Address adr) {
   assert(VM_Version::supports_clflush(), "should do");
-  prefix(adr);
-  emit_int16(0x0F, (unsigned char)0xAE);
+  prefix(adr, true /* is_map1 */);
+  emit_int8((unsigned char)0xAE);
   emit_operand(rdi, adr, 0);
 }
 
@@ -13261,9 +13580,9 @@ void Assembler::clflushopt(Address adr) {
   assert(adr.disp() == 0, "displacement should be 0");
   // instruction prefix is 0x66
   emit_int8(0x66);
-  prefix(adr);
+  prefix(adr, true /* is_map1 */);
   // opcode family is 0x0F 0xAE
-  emit_int16(0x0F, (unsigned char)0xAE);
+  emit_int8((unsigned char)0xAE);
   // extended opcode byte is 7 == rdi
   emit_operand(rdi, adr, 0);
 }
@@ -13276,21 +13595,22 @@ void Assembler::clwb(Address adr) {
   assert(adr.disp() == 0, "displacement should be 0");
   // instruction prefix is 0x66
   emit_int8(0x66);
-  prefix(adr);
+  prefix(adr, true /* is_map1 */);
   // opcode family is 0x0f 0xAE
-  emit_int16(0x0F, (unsigned char)0xAE);
+  emit_int8((unsigned char)0xAE);
   // extended opcode byte is 6 == rsi
   emit_operand(rsi, adr, 0);
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (0x40 | cc), (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((0x40 | cc), 0xC0, encode);
 }
 
 void Assembler::cmovq(Condition cc, Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst), 0x0F, (0x40 | cc));
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (0x40 | cc));
   emit_operand(dst, src, 0);
 }
 
@@ -13307,7 +13627,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) {
 
 void Assembler::cmpq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), 0x39);
+  emit_prefix_and_int8(get_prefixq(dst, src), 0x39);
   emit_operand(src, dst, 0);
 }
 
@@ -13318,20 +13638,21 @@ void Assembler::cmpq(Register dst, Register src) {
 
 void Assembler::cmpq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x3B);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x3B);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::cmpxchgq(Register reg, Address adr) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(adr, reg), 0x0F, (unsigned char)0xB1);
+  int prefix = get_prefixq(adr, reg, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xB1);
   emit_operand(reg, adr, 0);
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x2A, (0xC0 | encode));
 }
 
@@ -13360,7 +13681,9 @@ void Assembler::cvttsd2siq(Register dst, Address src) {
   // F2 REX.W 0F 2C /r
   // CVTTSD2SI r64, xmm1/m64
   InstructionMark im(this);
-  emit_int32((unsigned char)0xF2, REX_W, 0x0F, 0x2C);
+  emit_int8((unsigned char)0xF2);
+  prefixq(src, dst, true /* is_map1 */);
+  emit_int8((unsigned char)0x2C);
   emit_operand(dst, src, 0);
 }
 
@@ -13402,28 +13725,32 @@ void Assembler::decq(Register dst) {
 void Assembler::decq(Address dst) {
   // Don't use it directly. Use MacroAssembler::decrementq() instead.
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xFF);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xFF);
   emit_operand(rcx, dst, 0);
 }
 
+// can't use REX2
 void Assembler::fxrstor(Address src) {
   InstructionMark im(this);
   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(1), src, 0);
 }
 
+// can't use REX2
 void Assembler::xrstor(Address src) {
   InstructionMark im(this);
   emit_int24(get_prefixq(src), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(5), src, 0);
 }
 
+// can't use REX2
 void Assembler::fxsave(Address dst) {
   InstructionMark im(this);
   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
   emit_operand(as_Register(0), dst, 0);
 }
 
+// cant use REX2
 void Assembler::xsave(Address dst) {
   InstructionMark im(this);
   emit_int24(get_prefixq(dst), 0x0F, (unsigned char)0xAE);
@@ -13441,8 +13768,8 @@ void Assembler::divq(Register src) {
 }
 
 void Assembler::imulq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xAF, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xAF, 0xC0, encode);
 }
 
 void Assembler::imulq(Register src) {
@@ -13476,7 +13803,8 @@ void Assembler::imulq(Register dst, Register src, int value) {
 
 void Assembler::imulq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst), 0x0F, (unsigned char)0xAF);
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xAF);
   emit_operand(dst, src, 0);
 }
 
@@ -13497,7 +13825,7 @@ void Assembler::incq(Register dst) {
 void Assembler::incq(Address dst) {
   // Don't use it directly. Use MacroAssembler::incrementq() instead.
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xFF);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xFF);
   emit_operand(rax, dst, 0);
 }
 
@@ -13507,7 +13835,7 @@ void Assembler::lea(Register dst, Address src) {
 
 void Assembler::leaq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), (unsigned char)0x8D);
+  emit_prefix_and_int8(get_prefixq(src, dst), (unsigned char)0x8D);
   emit_operand(dst, src, 0);
 }
 
@@ -13565,16 +13893,16 @@ void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder con
 void Assembler::lzcntq(Register dst, Register src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
   emit_int8((unsigned char)0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBD, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBD, 0xC0, encode);
 }
 
 void Assembler::lzcntq(Register dst, Address src) {
   assert(VM_Version::supports_lzcnt(), "encoding is treated as BSR");
   InstructionMark im(this);
   emit_int8((unsigned char)0xF3);
-  prefixq(src, dst);
-  emit_int16(0x0F, (unsigned char)0xBD);
+  prefixq(src, dst, true /* is_map1 */);
+  emit_int8((unsigned char)0xBD);
   emit_operand(dst, src, 0);
 }
 
@@ -13582,7 +13910,7 @@ void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x6E, (0xC0 | encode));
 }
 
@@ -13591,7 +13919,7 @@ void Assembler::movdq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true);
   emit_int16(0x7E,
              (0xC0 | encode));
 }
@@ -13604,19 +13932,19 @@ void Assembler::movq(Register dst, Register src) {
 
 void Assembler::movq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), (unsigned char)0x8B);
+  emit_prefix_and_int8(get_prefixq(src, dst), (unsigned char)0x8B);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), (unsigned char)0x89);
+  emit_prefix_and_int8(get_prefixq(dst, src), (unsigned char)0x89);
   emit_operand(src, dst, 0);
 }
 
 void Assembler::movq(Address dst, int32_t imm32) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xC7);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xC7);
   emit_operand(as_Register(0), dst, 4);
   emit_int32(imm32);
 }
@@ -13629,28 +13957,27 @@ void Assembler::movq(Register dst, int32_t imm32) {
 
 void Assembler::movsbq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst),
-             0x0F,
-             (unsigned char)0xBE);
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xBE);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movsbq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBE, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBE, 0xC0, encode);
 }
 
 void Assembler::movslq(Address dst, int32_t imm32) {
   assert(is_simm32(imm32), "lost bits");
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xC7);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xC7);
   emit_operand(rax, dst, 4);
   emit_int32(imm32);
 }
 
 void Assembler::movslq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x63);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x63);
   emit_operand(dst, src, 0);
 }
 
@@ -13661,46 +13988,43 @@ void Assembler::movslq(Register dst, Register src) {
 
 void Assembler::movswq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst),
-             0x0F,
-             (unsigned char)0xBF);
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xBF);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movswq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xBF, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBF, 0xC0, encode);
 }
 
 void Assembler::movzbq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst),
-             0x0F,
-             (unsigned char)0xB6);
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xB6);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movzbq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xB6, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB6, 0xC0, encode);
 }
 
 void Assembler::movzwq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(src, dst),
-             0x0F,
-             (unsigned char)0xB7);
+  int prefix = get_prefixq(src, dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xB7);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::movzwq(Register dst, Register src) {
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xB7, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB7, 0xC0, encode);
 }
 
 void Assembler::mulq(Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src), (unsigned char)0xF7);
+  emit_prefix_and_int8(get_prefixq(src), (unsigned char)0xF7);
   emit_operand(rsp, src, 0);
 }
 
@@ -13711,8 +14035,8 @@ void Assembler::mulq(Register src) {
 
 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes, true);
   emit_int16((unsigned char)0xF6, (0xC0 | encode));
 }
 
@@ -13723,7 +14047,7 @@ void Assembler::negq(Register dst) {
 
 void Assembler::negq(Address dst) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xF7);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xF7);
   emit_operand(as_Register(3), dst, 0);
 }
 
@@ -13733,24 +14057,22 @@ void Assembler::notq(Register dst) {
 }
 
 void Assembler::btq(Register dst, Register src) {
-  int encode = prefixq_and_encode(src->encoding(), dst->encoding());
-  emit_int24(0x0F, (unsigned char)0xA3, (encode | 0xC0));
+  int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xA3, 0xC0, encode);
 }
 
 void Assembler::btq(Register src, int imm8) {
   assert(isByte(imm8), "not a byte");
-  int encode = prefixq_and_encode(src->encoding());
-  emit_int16(0x0f, 0xba);
-  emit_int8(0xe0|encode);
+  int encode = prefixq_and_encode(src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xBA, 0xE0, encode);
   emit_int8(imm8);
 }
 
 void Assembler::btsq(Address dst, int imm8) {
   assert(isByte(imm8), "not a byte");
   InstructionMark im(this);
-  emit_int24(get_prefixq(dst),
-             0x0F,
-             (unsigned char)0xBA);
+  int prefix = get_prefixq(dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xBA);
   emit_operand(rbp /* 5 */, dst, 1);
   emit_int8(imm8);
 }
@@ -13758,9 +14080,8 @@ void Assembler::btsq(Address dst, int imm8) {
 void Assembler::btrq(Address dst, int imm8) {
   assert(isByte(imm8), "not a byte");
   InstructionMark im(this);
-  emit_int24(get_prefixq(dst),
-             0x0F,
-             (unsigned char)0xBA);
+  int prefix = get_prefixq(dst, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xBA);
   emit_operand(rsi /* 6 */, dst, 1);
   emit_int8(imm8);
 }
@@ -13773,7 +14094,7 @@ void Assembler::orq(Address dst, int32_t imm32) {
 
 void Assembler::orq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), (unsigned char)0x09);
+  emit_prefix_and_int8(get_prefixq(dst, src), (unsigned char)0x09);
   emit_operand(src, dst, 0);
 }
 
@@ -13789,7 +14110,7 @@ void Assembler::orq_imm32(Register dst, int32_t imm32) {
 
 void Assembler::orq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x0B);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x0B);
   emit_operand(dst, src, 0);
 }
 
@@ -13801,28 +14122,27 @@ void Assembler::orq(Register dst, Register src) {
 void Assembler::popcntq(Register dst, Address src) {
   assert(VM_Version::supports_popcnt(), "must support");
   InstructionMark im(this);
-  emit_int32((unsigned char)0xF3,
-             get_prefixq(src, dst),
-             0x0F,
-             (unsigned char)0xB8);
+  emit_int8((unsigned char)0xF3);
+  emit_prefix_and_int8(get_prefixq(src, dst, true /* is_map1 */), (unsigned char) 0xB8);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::popcntq(Register dst, Register src) {
   assert(VM_Version::supports_popcnt(), "must support");
   emit_int8((unsigned char)0xF3);
-  int encode = prefixq_and_encode(dst->encoding(), src->encoding());
-  emit_int24(0x0F, (unsigned char)0xB8, (0xC0 | encode));
+  int encode = prefixq_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */);
+  emit_opcode_prefix_and_encoding((unsigned char)0xB8, 0xC0, encode);
 }
 
 void Assembler::popq(Address dst) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0x8F);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0x8F);
   emit_operand(rax, dst, 0);
 }
 
 void Assembler::popq(Register dst) {
-  emit_int8((unsigned char)0x58 | dst->encoding());
+  int encode = prefix_and_encode(dst->encoding());
+  emit_int8((unsigned char)0x58 | encode);
 }
 
 // Precomputable: popa, pusha, vzeroupper
@@ -13962,7 +14282,7 @@ void Assembler::vzeroall() {
 
 void Assembler::pushq(Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src), (unsigned char)0xFF);
+  emit_prefix_and_int8(get_prefixq(src), (unsigned char)0xFF);
   emit_operand(rsi, src, 0);
 }
 
@@ -13988,15 +14308,16 @@ void Assembler::rcrq(Register dst, int imm8) {
 
 void Assembler::rorxl(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
 }
 
 void Assembler::rorxl(Register dst, Address src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0xF0);
   emit_operand(dst, src, 1);
@@ -14005,15 +14326,16 @@ void Assembler::rorxl(Register dst, Address src, int imm8) {
 
 void Assembler::rorxq(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0,  src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes, true);
   emit_int24((unsigned char)0xF0, (0xC0 | encode), imm8);
 }
 
 void Assembler::rorxq(Register dst, Address src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0xF0);
   emit_operand(dst, src, 1);
@@ -14025,11 +14347,11 @@ void Assembler::salq(Address dst, int imm8) {
   InstructionMark im(this);
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   if (imm8 == 1) {
-    emit_int16(get_prefixq(dst), (unsigned char)0xD1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD1);
     emit_operand(as_Register(4), dst, 0);
   }
   else {
-    emit_int16(get_prefixq(dst), (unsigned char)0xC1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xC1);
     emit_operand(as_Register(4), dst, 1);
     emit_int8(imm8);
   }
@@ -14037,7 +14359,7 @@ void Assembler::salq(Address dst, int imm8) {
 
 void Assembler::salq(Address dst) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xD3);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
   emit_operand(as_Register(4), dst, 0);
 }
 
@@ -14060,11 +14382,11 @@ void Assembler::sarq(Address dst, int imm8) {
   InstructionMark im(this);
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   if (imm8 == 1) {
-    emit_int16(get_prefixq(dst), (unsigned char)0xD1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD1);
     emit_operand(as_Register(7), dst, 0);
   }
   else {
-    emit_int16(get_prefixq(dst), (unsigned char)0xC1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xC1);
     emit_operand(as_Register(7), dst, 1);
     emit_int8(imm8);
   }
@@ -14072,7 +14394,7 @@ void Assembler::sarq(Address dst, int imm8) {
 
 void Assembler::sarq(Address dst) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xD3);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
   emit_operand(as_Register(7), dst, 0);
 }
 
@@ -14105,7 +14427,7 @@ void Assembler::sbbq(Register dst, int32_t imm32) {
 
 void Assembler::sbbq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x1B);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x1B);
   emit_operand(dst, src, 0);
 }
 
@@ -14147,7 +14469,7 @@ void Assembler::shrq(Register dst) {
 
 void Assembler::shrq(Address dst) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xD3);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD3);
   emit_operand(as_Register(5), dst, 0);
 }
 
@@ -14155,11 +14477,11 @@ void Assembler::shrq(Address dst, int imm8) {
   InstructionMark im(this);
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   if (imm8 == 1) {
-    emit_int16(get_prefixq(dst), (unsigned char)0xD1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xD1);
     emit_operand(as_Register(5), dst, 0);
   }
   else {
-    emit_int16(get_prefixq(dst), (unsigned char)0xC1);
+    emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xC1);
     emit_operand(as_Register(5), dst, 1);
     emit_int8(imm8);
   }
@@ -14173,7 +14495,7 @@ void Assembler::subq(Address dst, int32_t imm32) {
 
 void Assembler::subq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), 0x29);
+  emit_prefix_and_int8(get_prefixq(dst, src), 0x29);
   emit_operand(src, dst, 0);
 }
 
@@ -14190,7 +14512,7 @@ void Assembler::subq_imm32(Register dst, int32_t imm32) {
 
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x2B);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x2B);
   emit_operand(dst, src, 0);
 }
 
@@ -14201,7 +14523,7 @@ void Assembler::subq(Register dst, Register src) {
 
 void Assembler::testq(Address dst, int32_t imm32) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst), (unsigned char)0xF7);
+  emit_prefix_and_int8(get_prefixq(dst), (unsigned char)0xF7);
   emit_operand(as_Register(0), dst, 4);
   emit_int32(imm32);
 }
@@ -14229,19 +14551,20 @@ void Assembler::testq(Register dst, Register src) {
 
 void Assembler::testq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), (unsigned char)0x85);
+  emit_prefix_and_int8(get_prefixq(src, dst), (unsigned char)0x85);
   emit_operand(dst, src, 0);
 }
 
 void Assembler::xaddq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int24(get_prefixq(dst, src), 0x0F, (unsigned char)0xC1);
+  int prefix = get_prefixq(dst, src, true /* is_map1 */);
+  emit_prefix_and_int8(prefix, (unsigned char)0xC1);
   emit_operand(src, dst, 0);
 }
 
 void Assembler::xchgq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), (unsigned char)0x87);
+  emit_prefix_and_int8(get_prefixq(src, dst), (unsigned char)0x87);
   emit_operand(dst, src, 0);
 }
 
@@ -14257,7 +14580,7 @@ void Assembler::xorq(Register dst, Register src) {
 
 void Assembler::xorq(Register dst, Address src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(src, dst), 0x33);
+  emit_prefix_and_int8(get_prefixq(src, dst), 0x33);
   emit_operand(dst, src, 0);
 }
 
@@ -14274,7 +14597,7 @@ void Assembler::xorq(Address dst, int32_t imm32) {
 
 void Assembler::xorq(Address dst, Register src) {
   InstructionMark im(this);
-  emit_int16(get_prefixq(dst, src), 0x31);
+  emit_prefix_and_int8(get_prefixq(dst, src), 0x31);
   emit_operand(src, dst, 0);
 }
 
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 097bc9de62a..32a81519f82 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -308,17 +308,29 @@ class Address {
 
  private:
   bool base_needs_rex() const {
-    return _base->is_valid() && _base->encoding() >= 8;
+    return _base->is_valid() && ((_base->encoding() & 8) == 8);
+  }
+
+  bool base_needs_rex2() const {
+    return _base->is_valid() && _base->encoding() >= 16;
   }
 
   bool index_needs_rex() const {
-    return _index->is_valid() &&_index->encoding() >= 8;
+    return _index->is_valid() && ((_index->encoding() & 8) == 8);
+  }
+
+  bool index_needs_rex2() const {
+    return _index->is_valid() &&_index->encoding() >= 16;
   }
 
   bool xmmindex_needs_rex() const {
     return _xmmindex->is_valid() && ((_xmmindex->encoding() & 8) == 8);
   }
 
+  bool xmmindex_needs_rex2() const {
+    return _xmmindex->is_valid() && _xmmindex->encoding() >= 16;
+  }
+
   relocInfo::relocType reloc() const { return _rspec.type(); }
 
   friend class Assembler;
@@ -508,12 +520,26 @@ class Assembler : public AbstractAssembler  {
     REX_WRX    = 0x4E,
     REX_WRXB   = 0x4F,
 
+    REX2       = 0xd5,
+    WREX2      = REX2 << 8,
+
     VEX_3bytes = 0xC4,
     VEX_2bytes = 0xC5,
     EVEX_4bytes = 0x62,
     Prefix_EMPTY = 0x0
   };
 
+  enum PrefixBits {
+    REXBIT_B  = 0x01,
+    REXBIT_X  = 0x02,
+    REXBIT_R  = 0x04,
+    REXBIT_W  = 0x08,
+    REX2BIT_B4 = 0x10,
+    REX2BIT_X4 = 0x20,
+    REX2BIT_R4 = 0x40,
+    REX2BIT_M0 = 0x80
+  };
+
   enum VexPrefix {
     VEX_B = 0x20,
     VEX_X = 0x40,
@@ -525,10 +551,18 @@ class Assembler : public AbstractAssembler  {
     EVEX_F  = 0x04,
     EVEX_V  = 0x08,
     EVEX_Rb = 0x10,
+    EVEX_B  = 0x20,
     EVEX_X  = 0x40,
     EVEX_Z  = 0x80
   };
 
+  enum ExtEvexPrefix {
+    EEVEX_R = 0x10,
+    EEVEX_B = 0x08,
+    EEVEX_X = 0x04,
+    EEVEX_V = 0x08
+  };
+
   enum EvexRoundPrefix {
     EVEX_RNE = 0x0,
     EVEX_RD  = 0x1,
@@ -540,7 +574,7 @@ class Assembler : public AbstractAssembler  {
     VEX_SIMD_NONE = 0x0,
     VEX_SIMD_66   = 0x1,
     VEX_SIMD_F3   = 0x2,
-    VEX_SIMD_F2   = 0x3
+    VEX_SIMD_F2   = 0x3,
   };
 
   enum VexOpcode {
@@ -548,6 +582,7 @@ class Assembler : public AbstractAssembler  {
     VEX_OPCODE_0F    = 0x1,
     VEX_OPCODE_0F_38 = 0x2,
     VEX_OPCODE_0F_3A = 0x3,
+    VEX_OPCODE_0F_3C = 0x4,
     VEX_OPCODE_MASK  = 0x1F
   };
 
@@ -572,7 +607,8 @@ class Assembler : public AbstractAssembler  {
     EVEX_OVM  = 20,
     EVEX_M128 = 21,
     EVEX_DUP  = 22,
-    EVEX_ETUP = 23
+    EVEX_NOSCALE = 23,
+    EVEX_ETUP = 24
   };
 
   enum EvexInputSizeInBits {
@@ -686,33 +722,62 @@ private:
   InstructionAttr *_attributes;
   void set_attributes(InstructionAttr* attributes);
 
+  int get_base_prefix_bits(int enc);
+  int get_index_prefix_bits(int enc);
+  int get_base_prefix_bits(Register base);
+  int get_index_prefix_bits(Register index);
+  int get_reg_prefix_bits(int enc);
+
   // 64bit prefixes
   void prefix(Register reg);
   void prefix(Register dst, Register src, Prefix p);
+  void prefix_rex2(Register dst, Register src);
   void prefix(Register dst, Address adr, Prefix p);
+  void prefix_rex2(Register dst, Address adr);
 
-  void prefix(Address adr);
-  void prefix(Address adr, Register reg,  bool byteinst = false);
+  // The is_map1 bool indicates an x86 map1 instruction which, when
+  // legacy encoded, uses a 0x0F opcode prefix.  By specification, the
+  // opcode prefix is omitted when using rex2 encoding in support
+  // of APX extended GPRs.
+  void prefix(Address adr, bool is_map1 = false);
+  void prefix_rex2(Address adr, bool is_map1 = false);
+  void prefix(Address adr, Register reg,  bool byteinst = false, bool is_map1 = false);
+  void prefix_rex2(Address adr, Register reg,  bool byteinst = false, bool is_map1 = false);
   void prefix(Address adr, XMMRegister reg);
+  void prefix_rex2(Address adr, XMMRegister reg);
 
-  int prefix_and_encode(int reg_enc, bool byteinst = false);
-  int prefix_and_encode(int dst_enc, int src_enc) {
-    return prefix_and_encode(dst_enc, false, src_enc, false);
+  int prefix_and_encode(int reg_enc, bool byteinst = false, bool is_map1 = false);
+  int prefix_and_encode_rex2(int reg_enc, bool is_map1 = false);
+  int prefix_and_encode(int dst_enc, int src_enc, bool is_map1 = false) {
+    return prefix_and_encode(dst_enc, false, src_enc, false, is_map1);
   }
-  int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
+  int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte, bool is_map1 = false);
 
+  int prefix_and_encode_rex2(int dst_enc, int src_enc, int init_bits = 0);
   // Some prefixq variants always emit exactly one prefix byte, so besides a
   // prefix-emitting method we provide a method to get the prefix byte to emit,
   // which can then be folded into a byte stream.
-  int8_t get_prefixq(Address adr);
-  int8_t get_prefixq(Address adr, Register reg);
+  int get_prefixq(Address adr, bool is_map1 = false);
+  int get_prefixq_rex2(Address adr, bool is_map1 = false);
+  int get_prefixq(Address adr, Register reg, bool is_map1 = false);
+  int get_prefixq_rex2(Address adr, Register reg, bool ismap1 = false);
 
   void prefixq(Address adr);
-  void prefixq(Address adr, Register reg);
+  void prefixq(Address adr, Register reg, bool is_map1 = false);
   void prefixq(Address adr, XMMRegister reg);
+  void prefixq_rex2(Address adr, XMMRegister src);
 
-  int prefixq_and_encode(int reg_enc);
-  int prefixq_and_encode(int dst_enc, int src_enc);
+  bool prefix_is_rex2(int prefix);
+
+  int prefixq_and_encode(int reg_enc, bool is_map1 = false);
+  int prefixq_and_encode_rex2(int reg_enc, bool is_map1 = false);
+  int prefixq_and_encode(int dst_enc, int src_enc, bool is_map1 = false);
+  int prefixq_and_encode_rex2(int dst_enc, int src_enc, bool is_map1 = false);
+
+  bool needs_rex2(Register reg1, Register reg2 = noreg, Register reg3 = noreg);
+
+  bool needs_eevex(Register reg1, Register reg2 = noreg, Register reg3 = noreg);
+  bool needs_eevex(int enc1, int enc2 = -1, int enc3 = -1);
 
   void rex_prefix(Address adr, XMMRegister xreg,
                   VexSimdPrefix pre, VexOpcode opc, bool rex_w);
@@ -721,22 +786,21 @@ private:
 
   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
-  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
-                   int nds_enc, VexSimdPrefix pre, VexOpcode opc);
+  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b,
+                       bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
-  void vex_prefix(Address adr, int nds_enc, int xreg_enc,
-                  VexSimdPrefix pre, VexOpcode opc,
+  void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
                   InstructionAttr *attributes);
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
-                             InstructionAttr *attributes);
+                             InstructionAttr *attributes, bool src_is_gpr = false);
 
   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
                    VexOpcode opc, InstructionAttr *attributes);
 
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
-                             VexOpcode opc, InstructionAttr *attributes);
+                             VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr = false);
 
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -821,6 +885,10 @@ private:
   void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
   void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
 
+  void emit_prefix_and_int8(int prefix, int b1);
+  void emit_opcode_prefix_and_encoding(int byte1, int ocp_and_encoding);
+  void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding);
+  void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding, int byte3);
   bool always_reachable(AddressLiteral adr) NOT_LP64( { return true; } );
   bool        reachable(AddressLiteral adr) NOT_LP64( { return true; } );
 
@@ -907,6 +975,8 @@ private:
   // Instruction prefixes
   void prefix(Prefix p);
 
+  void prefix16(int p);
+
   public:
 
   // Creation
diff --git a/src/hotspot/cpu/x86/assembler_x86.inline.hpp b/src/hotspot/cpu/x86/assembler_x86.inline.hpp
index 66aebb32e21..d2aa76fcf56 100644
--- a/src/hotspot/cpu/x86/assembler_x86.inline.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.inline.hpp
@@ -30,23 +30,53 @@
 #include "code/codeCache.hpp"
 
 #ifndef _LP64
-inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; }
-inline int Assembler::prefixq_and_encode(int reg_enc) { return reg_enc; }
+inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst, bool is_map1)
+{
+    int opc_prefix = is_map1 ? 0x0F00 : 0;
+    return opc_prefix | reg_enc;
+}
 
-inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) { return dst_enc << 3 | src_enc; }
-inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }
+inline int Assembler::prefixq_and_encode(int reg_enc, bool is_map1) {
+    int opc_prefix = is_map1 ? 0xF00 : 0;
+    return opc_prefix | reg_enc;
+}
+
+inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte, bool is_map1) {
+    int opc_prefix = is_map1 ? 0xF00 : 0;
+    return opc_prefix | (dst_enc << 3 | src_enc);
+}
+
+inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc, bool is_map1) {
+    int opc_prefix = is_map1 ? 0xF00 : 0;
+    return opc_prefix | dst_enc << 3 | src_enc;
+}
 
 inline void Assembler::prefix(Register reg) {}
 inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
 inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
-inline void Assembler::prefix(Address adr) {}
+
+inline void Assembler::prefix(Address adr, bool is_map1) {
+    if (is_map1) {
+        emit_int8(0x0F);
+    }
+}
+
 inline void Assembler::prefixq(Address adr) {}
 
-inline void Assembler::prefix(Address adr, Register reg,  bool byteinst) {}
-inline void Assembler::prefixq(Address adr, Register reg) {}
+inline void Assembler::prefix(Address adr, Register reg,  bool byteinst, bool is_map1) {
+    if (is_map1) {
+        emit_int8(0x0F);
+    }
+}
+inline void Assembler::prefixq(Address adr, Register reg, bool is_map1) {
+    if (is_map1) {
+        emit_int8(0x0F);
+    }
+}
 
 inline void Assembler::prefix(Address adr, XMMRegister reg) {}
 inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
+
 #endif // _LP64
 
 #endif // CPU_X86_ASSEMBLER_X86_INLINE_HPP
diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp
index 0f01eb3a0c4..d73a8d03171 100644
--- a/src/hotspot/cpu/x86/globals_x86.hpp
+++ b/src/hotspot/cpu/x86/globals_x86.hpp
@@ -232,8 +232,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
                                                                             \
   product(bool, IntelJccErratumMitigation, true, DIAGNOSTIC,                \
              "Turn off JVM mitigations related to Intel micro code "        \
-             "mitigations for the Intel JCC erratum")
-
+             "mitigations for the Intel JCC erratum")                       \
+                                                                            \
+  product(bool, UseAPX, false, EXPERIMENTAL,                                \
+          "Use Advanced Performance Extensions on x86")                     \
 // end of ARCH_FLAGS
 
 #endif // CPU_X86_GLOBALS_X86_HPP
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index fbc952fc8d1..f5389d0ef90 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -1002,6 +1002,14 @@ void VM_Version::get_processor_features() {
     }
   }
 
+  // APX support not enabled yet
+  if (UseAPX) {
+    if (!FLAG_IS_DEFAULT(UseAPX)) {
+        warning("APX is not supported on this CPU.");
+    }
+    FLAG_SET_DEFAULT(UseAPX, false);
+  }
+
   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
   } else {