8205398: AES-CBC decryption algorithm using AVX512 instructions
Co-authored-by: Shay Gueron <shay.gueron@intel.com> Co-authored-by: Smita Kamath <smita.kamath@intel.com> Co-authored-by: Shravya Rukmannagari <shravya.rukmannagari@intel.com> Reviewed-by: kvn
This commit is contained in:
parent
277b35da28
commit
a6ac56a69e
@ -1303,6 +1303,16 @@ void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8(0xC0 | encode);
|
||||
}
|
||||
|
||||
void Assembler::vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_vaes(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xDE);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
|
||||
void Assembler::aesdeclast(XMMRegister dst, Address src) {
|
||||
assert(VM_Version::supports_aes(), "");
|
||||
InstructionMark im(this);
|
||||
@ -1320,6 +1330,15 @@ void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_vaes(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0xDF);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::aesenc(XMMRegister dst, Address src) {
|
||||
assert(VM_Version::supports_aes(), "");
|
||||
InstructionMark im(this);
|
||||
@ -4391,6 +4410,15 @@ void Assembler::vpalignr(XMMRegister dst, XMMRegister nds, XMMRegister src, int
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x3);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -6708,7 +6736,29 @@ void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
|
||||
emit_int8(0x59);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
void Assembler::evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(vector_len != Assembler::AVX_128bit, "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evbroadcasti64x2(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(vector_len != Assembler::AVX_128bit, "");
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_rex_vex_w_reverted();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x5A);
|
||||
emit_operand(dst, src);
|
||||
}
|
||||
|
||||
// scalar single/double precision replicate
|
||||
|
||||
|
@ -926,7 +926,8 @@ private:
|
||||
void aesenc(XMMRegister dst, XMMRegister src);
|
||||
void aesenclast(XMMRegister dst, Address src);
|
||||
void aesenclast(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
void andl(Address dst, int32_t imm32);
|
||||
void andl(Register dst, int32_t imm32);
|
||||
@ -1739,6 +1740,7 @@ private:
|
||||
|
||||
void palignr(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
|
||||
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
|
||||
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
|
||||
|
||||
@ -2102,6 +2104,9 @@ private:
|
||||
void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
// scalar single/double precision replicate
|
||||
void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
|
||||
|
@ -4084,6 +4084,312 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
void roundDec(XMMRegister xmm_reg) {
|
||||
__ vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void roundDeclast(XMMRegister xmm_reg) {
|
||||
__ vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
|
||||
__ vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL) {
|
||||
__ movdqu(xmmdst, Address(key, offset));
|
||||
if (xmm_shuf_mask != NULL) {
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
} else {
|
||||
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
}
|
||||
__ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
|
||||
|
||||
}
|
||||
|
||||
address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
assert(VM_Version::supports_vaes(), "need AES instructions and misaligned SSE support");
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
|
||||
address start = __ pc();
|
||||
|
||||
const Register from = c_rarg0; // source array address
|
||||
const Register to = c_rarg1; // destination array address
|
||||
const Register key = c_rarg2; // key array address
|
||||
const Register rvec = c_rarg3; // r byte array initialized from initvector array address
|
||||
// and left with the results of the last encryption block
|
||||
#ifndef _WIN64
|
||||
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
|
||||
#else
|
||||
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
||||
const Register len_reg = r11; // pick the volatile windows register
|
||||
#endif
|
||||
|
||||
Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop,
|
||||
Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit;
|
||||
|
||||
__ enter();
|
||||
|
||||
#ifdef _WIN64
|
||||
// on win64, fill len_reg from stack position
|
||||
__ movl(len_reg, len_mem);
|
||||
#else
|
||||
__ push(len_reg); // Save
|
||||
#endif
|
||||
__ push(rbx);
|
||||
__ vzeroupper();
|
||||
|
||||
// Temporary variable declaration for swapping key bytes
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
|
||||
// Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
|
||||
const Register rounds = rbx;
|
||||
__ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
const XMMRegister IV = xmm0;
|
||||
// Load IV and broadcast value to 512-bits
|
||||
__ evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit);
|
||||
|
||||
// Temporary variables for storing round keys
|
||||
const XMMRegister RK0 = xmm30;
|
||||
const XMMRegister RK1 = xmm9;
|
||||
const XMMRegister RK2 = xmm18;
|
||||
const XMMRegister RK3 = xmm19;
|
||||
const XMMRegister RK4 = xmm20;
|
||||
const XMMRegister RK5 = xmm21;
|
||||
const XMMRegister RK6 = xmm22;
|
||||
const XMMRegister RK7 = xmm23;
|
||||
const XMMRegister RK8 = xmm24;
|
||||
const XMMRegister RK9 = xmm25;
|
||||
const XMMRegister RK10 = xmm26;
|
||||
|
||||
// Load and shuffle key
|
||||
// the java expanded key ordering is rotated one position from what we want
|
||||
// so we start from 1*16 here and hit 0*16 last
|
||||
ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask);
|
||||
|
||||
// Variables for storing source cipher text
|
||||
const XMMRegister S0 = xmm10;
|
||||
const XMMRegister S1 = xmm11;
|
||||
const XMMRegister S2 = xmm12;
|
||||
const XMMRegister S3 = xmm13;
|
||||
const XMMRegister S4 = xmm14;
|
||||
const XMMRegister S5 = xmm15;
|
||||
const XMMRegister S6 = xmm16;
|
||||
const XMMRegister S7 = xmm17;
|
||||
|
||||
// Variables for storing decrypted text
|
||||
const XMMRegister B0 = xmm1;
|
||||
const XMMRegister B1 = xmm2;
|
||||
const XMMRegister B2 = xmm3;
|
||||
const XMMRegister B3 = xmm4;
|
||||
const XMMRegister B4 = xmm5;
|
||||
const XMMRegister B5 = xmm6;
|
||||
const XMMRegister B6 = xmm7;
|
||||
const XMMRegister B7 = xmm8;
|
||||
|
||||
__ cmpl(rounds, 44);
|
||||
__ jcc(Assembler::greater, KEY_192);
|
||||
__ jmp(Loop);
|
||||
|
||||
__ BIND(KEY_192);
|
||||
const XMMRegister RK11 = xmm27;
|
||||
const XMMRegister RK12 = xmm28;
|
||||
ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask);
|
||||
|
||||
__ cmpl(rounds, 52);
|
||||
__ jcc(Assembler::greater, KEY_256);
|
||||
__ jmp(Loop);
|
||||
|
||||
__ BIND(KEY_256);
|
||||
const XMMRegister RK13 = xmm29;
|
||||
const XMMRegister RK14 = xmm31;
|
||||
ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask);
|
||||
ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask);
|
||||
|
||||
__ BIND(Loop);
|
||||
__ cmpl(len_reg, 512);
|
||||
__ jcc(Assembler::below, Lcbc_dec_rem);
|
||||
__ BIND(Loop1);
|
||||
__ subl(len_reg, 512);
|
||||
__ evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit);
|
||||
__ evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit);
|
||||
__ leaq(from, Address(from, 8 * 64));
|
||||
|
||||
__ evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B1, S1, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B2, S2, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B3, S3, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B4, S4, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B5, S5, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B6, S6, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B7, S7, RK1, Assembler::AVX_512bit);
|
||||
|
||||
__ evalignq(IV, S0, IV, 0x06);
|
||||
__ evalignq(S0, S1, S0, 0x06);
|
||||
__ evalignq(S1, S2, S1, 0x06);
|
||||
__ evalignq(S2, S3, S2, 0x06);
|
||||
__ evalignq(S3, S4, S3, 0x06);
|
||||
__ evalignq(S4, S5, S4, 0x06);
|
||||
__ evalignq(S5, S6, S5, 0x06);
|
||||
__ evalignq(S6, S7, S6, 0x06);
|
||||
|
||||
roundDec(RK2);
|
||||
roundDec(RK3);
|
||||
roundDec(RK4);
|
||||
roundDec(RK5);
|
||||
roundDec(RK6);
|
||||
roundDec(RK7);
|
||||
roundDec(RK8);
|
||||
roundDec(RK9);
|
||||
roundDec(RK10);
|
||||
|
||||
__ cmpl(rounds, 44);
|
||||
__ jcc(Assembler::belowEqual, L_128);
|
||||
roundDec(RK11);
|
||||
roundDec(RK12);
|
||||
|
||||
__ cmpl(rounds, 52);
|
||||
__ jcc(Assembler::belowEqual, L_192);
|
||||
roundDec(RK13);
|
||||
roundDec(RK14);
|
||||
|
||||
__ BIND(L_256);
|
||||
roundDeclast(RK0);
|
||||
__ jmp(Loop2);
|
||||
|
||||
__ BIND(L_128);
|
||||
roundDeclast(RK0);
|
||||
__ jmp(Loop2);
|
||||
|
||||
__ BIND(L_192);
|
||||
roundDeclast(RK0);
|
||||
|
||||
__ BIND(Loop2);
|
||||
__ evpxorq(B0, B0, IV, Assembler::AVX_512bit);
|
||||
__ evpxorq(B1, B1, S0, Assembler::AVX_512bit);
|
||||
__ evpxorq(B2, B2, S1, Assembler::AVX_512bit);
|
||||
__ evpxorq(B3, B3, S2, Assembler::AVX_512bit);
|
||||
__ evpxorq(B4, B4, S3, Assembler::AVX_512bit);
|
||||
__ evpxorq(B5, B5, S4, Assembler::AVX_512bit);
|
||||
__ evpxorq(B6, B6, S5, Assembler::AVX_512bit);
|
||||
__ evpxorq(B7, B7, S6, Assembler::AVX_512bit);
|
||||
__ evmovdquq(IV, S7, Assembler::AVX_512bit);
|
||||
|
||||
__ evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit);
|
||||
__ evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit);
|
||||
__ leaq(to, Address(to, 8 * 64));
|
||||
__ jmp(Loop);
|
||||
|
||||
__ BIND(Lcbc_dec_rem);
|
||||
__ evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit);
|
||||
|
||||
__ BIND(Lcbc_dec_rem_loop);
|
||||
__ subl(len_reg, 16);
|
||||
__ jcc(Assembler::carrySet, Lcbc_dec_ret);
|
||||
|
||||
__ movdqu(S0, Address(from, 0));
|
||||
__ evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK2, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK3, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK4, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK5, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK6, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK7, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK8, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK9, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK10, Assembler::AVX_512bit);
|
||||
__ cmpl(rounds, 44);
|
||||
__ jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
|
||||
|
||||
__ vaesdec(B0, B0, RK11, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK12, Assembler::AVX_512bit);
|
||||
__ cmpl(rounds, 52);
|
||||
__ jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
|
||||
|
||||
__ vaesdec(B0, B0, RK13, Assembler::AVX_512bit);
|
||||
__ vaesdec(B0, B0, RK14, Assembler::AVX_512bit);
|
||||
|
||||
__ BIND(Lcbc_dec_rem_last);
|
||||
__ vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit);
|
||||
|
||||
__ evpxorq(B0, B0, IV, Assembler::AVX_512bit);
|
||||
__ evmovdquq(IV, S0, Assembler::AVX_512bit);
|
||||
__ movdqu(Address(to, 0), B0);
|
||||
__ leaq(from, Address(from, 16));
|
||||
__ leaq(to, Address(to, 16));
|
||||
__ jmp(Lcbc_dec_rem_loop);
|
||||
|
||||
__ BIND(Lcbc_dec_ret);
|
||||
__ movdqu(Address(rvec, 0), IV);
|
||||
|
||||
// Zero out the round keys
|
||||
__ evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit);
|
||||
__ cmpl(rounds, 44);
|
||||
__ jcc(Assembler::belowEqual, Lcbc_exit);
|
||||
__ evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit);
|
||||
__ cmpl(rounds, 52);
|
||||
__ jcc(Assembler::belowEqual, Lcbc_exit);
|
||||
__ evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit);
|
||||
__ evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit);
|
||||
|
||||
__ BIND(Lcbc_exit);
|
||||
__ pop(rbx);
|
||||
#ifdef _WIN64
|
||||
__ movl(rax, len_mem);
|
||||
#else
|
||||
__ pop(rax); // return length
|
||||
#endif
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// byte swap x86 long
|
||||
address generate_ghash_long_swap_mask() {
|
||||
__ align(CodeEntryAlignment);
|
||||
@ -5078,7 +5384,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
if (VM_Version::supports_vaes() && VM_Version::supports_avx512vl() && VM_Version::supports_avx512dq() ) {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
|
||||
} else {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
}
|
||||
}
|
||||
if (UseAESCTRIntrinsics){
|
||||
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
||||
|
@ -666,6 +666,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512VL;
|
||||
_features &= ~CPU_AVX512_VPOPCNTDQ;
|
||||
_features &= ~CPU_VPCLMULQDQ;
|
||||
_features &= ~CPU_VAES;
|
||||
}
|
||||
|
||||
if (UseAVX < 2)
|
||||
|
@ -335,6 +335,7 @@ protected:
|
||||
#define CPU_VZEROUPPER ((uint64_t)UCONST64(0x1000000000)) // Vzeroupper instruction
|
||||
#define CPU_AVX512_VPOPCNTDQ ((uint64_t)UCONST64(0x2000000000)) // Vector popcount
|
||||
#define CPU_VPCLMULQDQ ((uint64_t)UCONST64(0x4000000000)) //Vector carryless multiplication
|
||||
#define CPU_VAES ((uint64_t)UCONST64(0x8000000000)) // Vector AES instructions
|
||||
|
||||
enum Extended_Family {
|
||||
// AMD
|
||||
@ -545,6 +546,8 @@ protected:
|
||||
result |= CPU_AVX512_VPOPCNTDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vpclmulqdq != 0)
|
||||
result |= CPU_VPCLMULQDQ;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
|
||||
result |= CPU_VAES;
|
||||
}
|
||||
}
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
|
||||
@ -823,6 +826,7 @@ public:
|
||||
static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
|
||||
static bool supports_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
|
||||
static bool supports_vpclmulqdq() { return (_features & CPU_VPCLMULQDQ) != 0; }
|
||||
static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
|
||||
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
|
Loading…
Reference in New Issue
Block a user