8293329: x86: Improve handling of constants in AES/GHASH stubs
Reviewed-by: kvn
This commit is contained in:
parent
d3f7e3b417
commit
155b10ae86
@ -65,6 +65,22 @@
|
||||
const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions
|
||||
const int FPU_CNTRL_WRD_MASK = 0xFFFF;
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint32_t KEY_SHUFFLE_MASK[] = {
|
||||
0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL,
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint32_t COUNTER_SHUFFLE_MASK[] = {
|
||||
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint32_t GHASH_BYTE_SWAP_MASK[] = {
|
||||
0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL,
|
||||
};
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint32_t GHASH_LONG_SWAP_MASK[] = {
|
||||
0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL,
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------------------------------------------------------------
|
||||
// Stub Code definitions
|
||||
|
||||
@ -2180,26 +2196,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
address generate_key_shuffle_mask() {
|
||||
__ align(16);
|
||||
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x00010203, relocInfo::none, 0 );
|
||||
__ emit_data(0x04050607, relocInfo::none, 0 );
|
||||
__ emit_data(0x08090a0b, relocInfo::none, 0 );
|
||||
__ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
|
||||
return start;
|
||||
address key_shuffle_mask_addr() {
|
||||
return (address)KEY_SHUFFLE_MASK;
|
||||
}
|
||||
|
||||
address generate_counter_shuffle_mask() {
|
||||
__ align(16);
|
||||
StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
return start;
|
||||
address counter_shuffle_mask_addr() {
|
||||
return (address)COUNTER_SHUFFLE_MASK;
|
||||
}
|
||||
|
||||
// Utility routine for loading a 128-bit key word in little endian format
|
||||
@ -2209,7 +2211,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (xmm_shuf_mask != xnoreg) {
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
} else {
|
||||
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2290,7 +2292,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
|
||||
__ movptr(to, to_param);
|
||||
|
||||
@ -2389,7 +2391,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_result, Address(from, 0));
|
||||
__ movptr(to, to_param);
|
||||
|
||||
@ -2522,7 +2524,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(len_reg , len_param);
|
||||
|
||||
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
// load up xmm regs 2 thru 7 with keys 0-5
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
@ -2690,7 +2692,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(rvec , rvec_param);
|
||||
__ movptr(len_reg , len_param);
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec
|
||||
|
||||
__ xorptr(pos, pos);
|
||||
@ -2909,11 +2911,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// initialize counter with initial counter
|
||||
__ movdqu(xmm_curr_counter, Address(counter, 0x00));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
|
||||
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase
|
||||
|
||||
// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ cmpl(rax, 52);
|
||||
__ jcc(Assembler::equal, L_key192_top);
|
||||
@ -2939,8 +2941,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
|
||||
__ jcc(Assembler::less, L_singleBlockLoopTop[k]);
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
|
||||
|
||||
//load, then increase counters
|
||||
CTR_DoFour(movdqa, xmm_curr_counter);
|
||||
@ -2992,8 +2994,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ BIND(L_singleBlockLoopTop[k]);
|
||||
__ cmpptr(len_reg, 0);
|
||||
__ jcc(Assembler::equal, L_exit);
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
|
||||
__ movdqa(xmm_result0, xmm_curr_counter);
|
||||
load_key(xmm_key, key, 0x00, xmm_key_shuf_mask);
|
||||
__ push(rbx);//rbx is used for increasing counter
|
||||
@ -3078,7 +3080,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
__ BIND(L_exit);
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()));
|
||||
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
|
||||
__ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
|
||||
handleSOERegisters(false /*restoring*/);
|
||||
@ -3266,28 +3268,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
// byte swap x86 long
|
||||
address generate_ghash_long_swap_mask() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x0b0a0908, relocInfo::none, 0);
|
||||
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
|
||||
__ emit_data(0x03020100, relocInfo::none, 0);
|
||||
__ emit_data(0x07060504, relocInfo::none, 0);
|
||||
|
||||
return start;
|
||||
address ghash_long_swap_mask_addr() {
|
||||
return (address)GHASH_LONG_SWAP_MASK;
|
||||
}
|
||||
|
||||
// byte swap x86 byte array
|
||||
address generate_ghash_byte_swap_mask() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
|
||||
address start = __ pc();
|
||||
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||
return start;
|
||||
address ghash_byte_swap_mask_addr() {
|
||||
return (address)GHASH_BYTE_SWAP_MASK;
|
||||
}
|
||||
|
||||
/* Single and multi-block ghash operations */
|
||||
@ -3326,14 +3313,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(blocks, blocks_param);
|
||||
|
||||
__ movdqu(xmm_temp0, Address(state, 0));
|
||||
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr()));
|
||||
|
||||
__ movdqu(xmm_temp1, Address(subkeyH, 0));
|
||||
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr()));
|
||||
|
||||
__ BIND(L_ghash_loop);
|
||||
__ movdqu(xmm_temp2, Address(data, 0));
|
||||
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||
__ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr()));
|
||||
|
||||
__ pxor(xmm_temp0, xmm_temp2);
|
||||
|
||||
@ -3419,7 +3406,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ BIND(L_exit);
|
||||
// Byte swap 16-byte result
|
||||
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr()));
|
||||
__ movdqu(Address(state, 0), xmm_temp6); // store the result
|
||||
|
||||
handleSOERegisters(false); // restore registers
|
||||
@ -4126,8 +4113,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
|
||||
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||
@ -4135,7 +4120,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||
}
|
||||
|
||||
@ -4158,8 +4142,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// Generate GHASH intrinsics code
|
||||
if (UseGHASHIntrinsics) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
|
||||
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||
}
|
||||
|
||||
|
@ -337,13 +337,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
address generate_key_shuffle_mask();
|
||||
|
||||
address generate_counter_shuffle_mask();
|
||||
|
||||
// This mask is used for incrementing counter value(linc0, linc4, etc.)
|
||||
address generate_counter_mask_addr();
|
||||
|
||||
address generate_ghash_polynomial512_addr();
|
||||
|
||||
void roundDec(XMMRegister xmm_reg);
|
||||
void roundDeclast(XMMRegister xmm_reg);
|
||||
void roundEnc(XMMRegister key, int rnum);
|
||||
@ -351,17 +344,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
void roundDec(XMMRegister key, int rnum);
|
||||
void lastroundDec(XMMRegister key, int rnum);
|
||||
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
|
||||
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
|
||||
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
|
||||
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
|
||||
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
|
||||
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
|
||||
bool final_reduction, int index, XMMRegister counter_inc_mask);
|
||||
// Load key and shuffle operation
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
|
||||
|
||||
// Utility routine for loading a 128-bit key word in little endian format
|
||||
// can optionally specify that the shuffle mask is already in an xmmregister
|
||||
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg);
|
||||
void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
|
||||
void load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch);
|
||||
|
||||
// Utility routine for increase 128bit counter (iv in CTR mode)
|
||||
void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block);
|
||||
@ -376,17 +371,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
void schoolbookAAD(int i, Register subkeyH, XMMRegister data, XMMRegister tmp0,
|
||||
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3);
|
||||
void gfmul(XMMRegister tmp0, XMMRegister t);
|
||||
void generateHtbl_one_block(Register htbl);
|
||||
void generateHtbl_one_block(Register htbl, Register rscratch);
|
||||
void generateHtbl_eight_blocks(Register htbl);
|
||||
void avx_ghash(Register state, Register htbl, Register data, Register blocks);
|
||||
|
||||
address generate_ghash_polynomial_addr();
|
||||
|
||||
address generate_ghash_shufflemask_addr();
|
||||
|
||||
address generate_ghash_long_swap_mask(); // byte swap x86 long
|
||||
|
||||
address generate_ghash_byte_swap_mask(); // byte swap x86 byte array
|
||||
// Used by GHASH and AES stubs.
|
||||
address ghash_polynomial_addr();
|
||||
address ghash_shufflemask_addr();
|
||||
address ghash_long_swap_mask_addr(); // byte swap x86 long
|
||||
address ghash_byte_swap_mask_addr(); // byte swap x86 byte array
|
||||
|
||||
// Single and multi-block ghash operations
|
||||
address generate_ghash_processBlocks();
|
||||
@ -395,6 +388,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
address generate_avx_ghash_processBlocks();
|
||||
|
||||
|
||||
// BASE64 stubs
|
||||
|
||||
address base64_shuffle_addr();
|
||||
address base64_avx2_shuffle_addr();
|
||||
address base64_avx2_input_mask_addr();
|
||||
|
@ -40,14 +40,109 @@
|
||||
|
||||
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
||||
|
||||
|
||||
// AES intrinsic stubs
|
||||
// Constants
|
||||
|
||||
const int AESBlockSize = 16;
|
||||
|
||||
// Shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers.
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t KEY_SHUFFLE_MASK[] = {
|
||||
0x0405060700010203UL, 0x0C0D0E0F08090A0BUL
|
||||
};
|
||||
static address key_shuffle_mask_addr() {
|
||||
return (address)KEY_SHUFFLE_MASK;
|
||||
}
|
||||
|
||||
// Shuffle mask for big-endian 128-bit integers.
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_SHUFFLE_MASK[] = {
|
||||
0x08090A0B0C0D0E0FUL, 0x0001020304050607UL,
|
||||
0x08090A0B0C0D0E0FUL, 0x0001020304050607UL,
|
||||
0x08090A0B0C0D0E0FUL, 0x0001020304050607UL,
|
||||
0x08090A0B0C0D0E0FUL, 0x0001020304050607UL,
|
||||
};
|
||||
static address counter_shuffle_mask_addr() {
|
||||
return (address)COUNTER_SHUFFLE_MASK;
|
||||
}
|
||||
|
||||
// This mask is used for incrementing counter value
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_LINC0[] = {
|
||||
0x0000000000000000UL, 0x0000000000000000UL,
|
||||
0x0000000000000001UL, 0x0000000000000000UL,
|
||||
0x0000000000000002UL, 0x0000000000000000UL,
|
||||
0x0000000000000003UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc0_addr() {
|
||||
return (address)COUNTER_MASK_LINC0;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t COUNTER_MASK_LINC1[] = {
|
||||
0x0000000000000001UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc1_addr() {
|
||||
return (address)COUNTER_MASK_LINC1;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_LINC4[] = {
|
||||
0x0000000000000004UL, 0x0000000000000000UL,
|
||||
0x0000000000000004UL, 0x0000000000000000UL,
|
||||
0x0000000000000004UL, 0x0000000000000000UL,
|
||||
0x0000000000000004UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc4_addr() {
|
||||
return (address)COUNTER_MASK_LINC4;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_LINC8[] = {
|
||||
0x0000000000000008UL, 0x0000000000000000UL,
|
||||
0x0000000000000008UL, 0x0000000000000000UL,
|
||||
0x0000000000000008UL, 0x0000000000000000UL,
|
||||
0x0000000000000008UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc8_addr() {
|
||||
return (address)COUNTER_MASK_LINC8;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_LINC16[] = {
|
||||
0x0000000000000010UL, 0x0000000000000000UL,
|
||||
0x0000000000000010UL, 0x0000000000000000UL,
|
||||
0x0000000000000010UL, 0x0000000000000000UL,
|
||||
0x0000000000000010UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc16_addr() {
|
||||
return (address)COUNTER_MASK_LINC16;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t COUNTER_MASK_LINC32[] = {
|
||||
0x0000000000000020UL, 0x0000000000000000UL,
|
||||
0x0000000000000020UL, 0x0000000000000000UL,
|
||||
0x0000000000000020UL, 0x0000000000000000UL,
|
||||
0x0000000000000020UL, 0x0000000000000000UL,
|
||||
};
|
||||
static address counter_mask_linc32_addr() {
|
||||
return (address)COUNTER_MASK_LINC32;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(64) uint64_t GHASH_POLYNOMIAL_REDUCTION[] = {
|
||||
0x00000001C2000000UL, 0xC200000000000000UL,
|
||||
0x00000001C2000000UL, 0xC200000000000000UL,
|
||||
0x00000001C2000000UL, 0xC200000000000000UL,
|
||||
0x00000001C2000000UL, 0xC200000000000000UL,
|
||||
};
|
||||
static address ghash_polynomial_reduction_addr() {
|
||||
return (address)GHASH_POLYNOMIAL_REDUCTION;
|
||||
}
|
||||
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t GHASH_POLYNOMIAL_TWO_ONE[] = {
|
||||
0x0000000000000001UL, 0x0000000100000000UL,
|
||||
};
|
||||
static address ghash_polynomial_two_one_addr() {
|
||||
return (address)GHASH_POLYNOMIAL_TWO_ONE;
|
||||
}
|
||||
|
||||
|
||||
// AES intrinsic stubs
|
||||
|
||||
void StubGenerator::generate_aes_stubs() {
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
|
||||
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||
@ -55,9 +150,6 @@ void StubGenerator::generate_aes_stubs() {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
|
||||
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
|
||||
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
|
||||
StubRoutines::x86::_counter_mask_addr = generate_counter_mask_addr();
|
||||
StubRoutines::x86::_ghash_poly512_addr = generate_ghash_polynomial512_addr();
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
|
||||
} else {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
@ -66,118 +158,13 @@ void StubGenerator::generate_aes_stubs() {
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
|
||||
if (StubRoutines::x86::_counter_mask_addr == NULL) {
|
||||
StubRoutines::x86::_counter_mask_addr = generate_counter_mask_addr();
|
||||
}
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
|
||||
} else {
|
||||
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
address StubGenerator::generate_key_shuffle_mask() {
|
||||
__ align(16);
|
||||
StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64( 0x0405060700010203, relocInfo::none );
|
||||
__ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address StubGenerator::generate_counter_shuffle_mask() {
|
||||
__ align(16);
|
||||
StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// This mask is used for incrementing counter value(linc0, linc4, etc.)
|
||||
address StubGenerator::generate_counter_mask_addr() {
|
||||
__ align64();
|
||||
StubCodeMark mark(this, "StubRoutines", "counter_mask_addr");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);//lbswapmask
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);//linc0 = counter_mask_addr+64
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none);//counter_mask_addr() + 80
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000002, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000003, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);//linc4 = counter_mask_addr() + 128
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000008, relocInfo::none);//linc8 = counter_mask_addr() + 192
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000008, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000008, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000008, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000020, relocInfo::none);//linc32 = counter_mask_addr() + 256
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000020, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000020, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000020, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000010, relocInfo::none);//linc16 = counter_mask_addr() + 320
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000010, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000010, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000010, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address StubGenerator::generate_ghash_polynomial512_addr() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none); // POLY
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
|
||||
__ emit_data64(0x0000000100000000, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Vector AES Galois Counter Mode implementation.
|
||||
//
|
||||
// Inputs: Windows | Linux
|
||||
@ -443,7 +430,7 @@ address StubGenerator::generate_counterMode_AESCrypt_Parallel() {
|
||||
|
||||
__ push(rbx); // Save RBX
|
||||
__ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
|
||||
__ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr()), pos /*rscratch*/);
|
||||
__ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
|
||||
__ movptr(pos, 0);
|
||||
|
||||
@ -466,7 +453,7 @@ address StubGenerator::generate_counterMode_AESCrypt_Parallel() {
|
||||
__ movl(Address(used_addr, 0), used);
|
||||
|
||||
// key length could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
__ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ cmpl(rbx, 52);
|
||||
__ jcc(Assembler::equal, L_multiBlock_loopTop[1]);
|
||||
@ -676,7 +663,7 @@ address StubGenerator::generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
|
||||
// Temporary variable declaration for swapping key bytes
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1;
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
// Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
|
||||
const Register rounds = rbx;
|
||||
@ -949,7 +936,7 @@ address StubGenerator::generate_aescrypt_encryptBlock() {
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), r10 /*rscratch*/);
|
||||
__ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input
|
||||
|
||||
// For encryption, the java expanded key ordering is just what we need
|
||||
@ -1004,6 +991,7 @@ address StubGenerator::generate_aescrypt_encryptBlock() {
|
||||
__ aesenclast(xmm_result, xmm_temp2);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
__ xorptr(rax, rax); // return 0
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
@ -1042,7 +1030,7 @@ address StubGenerator::generate_aescrypt_decryptBlock() {
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), r10 /*rscratch*/);
|
||||
__ movdqu(xmm_result, Address(from, 0));
|
||||
|
||||
// for decryption java expanded key ordering is rotated one position from what we want
|
||||
@ -1098,6 +1086,7 @@ address StubGenerator::generate_aescrypt_decryptBlock() {
|
||||
__ aesdeclast(xmm_result, xmm_temp3);
|
||||
__ movdqu(Address(to, 0), xmm_result); // store the result
|
||||
__ xorptr(rax, rax); // return 0
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
@ -1129,11 +1118,11 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
const Register key = c_rarg2; // key array address
|
||||
const Register rvec = c_rarg3; // r byte array initialized from initvector array address
|
||||
// and left with the results of the last encryption block
|
||||
#ifndef _WIN64
|
||||
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
|
||||
#else
|
||||
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
||||
#ifdef _WIN64
|
||||
const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
|
||||
const Register len_reg = r11; // pick the volatile windows register
|
||||
#else
|
||||
const Register len_reg = c_rarg4; // src len (must be multiple of blocksize 16)
|
||||
#endif
|
||||
const Register pos = rax;
|
||||
|
||||
@ -1159,7 +1148,7 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
#endif
|
||||
|
||||
const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), r10 /*rscratch*/);
|
||||
// load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
@ -1167,6 +1156,7 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
}
|
||||
__ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec
|
||||
|
||||
|
||||
// now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
|
||||
__ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
__ cmpl(rax, 44);
|
||||
@ -1240,7 +1230,7 @@ address StubGenerator::generate_cipherBlockChaining_encryptAESCrypt() {
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
|
||||
__ aesenc(xmm_result, as_XMMRegister(rnum));
|
||||
}
|
||||
load_key(xmm_temp, key, 0xe0);
|
||||
load_key(xmm_temp, key, 0xe0, r10 /*rscratch*/);
|
||||
__ aesenclast(xmm_result, xmm_temp);
|
||||
__ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
|
||||
// no need to store r to memory until we exit
|
||||
@ -1314,7 +1304,7 @@ address StubGenerator::generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
|
||||
// the java expanded key ordering is rotated one position from what we want
|
||||
// so we start from 0x10 here and hit 0x00 last
|
||||
const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
// load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
|
||||
for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
|
||||
load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
|
||||
@ -1356,20 +1346,20 @@ __ opc(xmm_result3, src_reg); \
|
||||
if (k == 1) {
|
||||
__ subptr(rsp, 6 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
|
||||
load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0
|
||||
load_key(xmm15, key, 0xb0, rbx /*rscratch*/); // 0xb0; 192-bit key goes up to 0xc0
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm15);
|
||||
load_key(xmm1, key, 0xc0); // 0xc0;
|
||||
load_key(xmm1, key, 0xc0, rbx /*rscratch*/); // 0xc0;
|
||||
__ movdqu(Address(rsp, 4 * wordSize), xmm1);
|
||||
} else if (k == 2) {
|
||||
__ subptr(rsp, 10 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
|
||||
load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes up to 0xe0
|
||||
load_key(xmm15, key, 0xd0, rbx /*rscratch*/); // 0xd0; 256-bit key goes up to 0xe0
|
||||
__ movdqu(Address(rsp, 6 * wordSize), xmm15);
|
||||
load_key(xmm1, key, 0xe0); // 0xe0;
|
||||
load_key(xmm1, key, 0xe0, rbx /*rscratch*/); // 0xe0;
|
||||
__ movdqu(Address(rsp, 8 * wordSize), xmm1);
|
||||
load_key(xmm15, key, 0xb0); // 0xb0;
|
||||
load_key(xmm15, key, 0xb0, rbx /*rscratch*/); // 0xb0;
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm15);
|
||||
load_key(xmm1, key, 0xc0); // 0xc0;
|
||||
load_key(xmm1, key, 0xc0, rbx /*rscratch*/); // 0xc0;
|
||||
__ movdqu(Address(rsp, 4 * wordSize), xmm1);
|
||||
}
|
||||
__ align(OptoLoopAlignment);
|
||||
@ -1455,11 +1445,11 @@ __ opc(xmm_result3, src_reg); \
|
||||
__ jcc(Assembler::equal, L_exit);
|
||||
__ BIND(L_singleBlock_loopTopHead2[k]);
|
||||
if (k == 1) {
|
||||
load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0
|
||||
load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes up to 0xc0
|
||||
load_key(xmm_key11, key, 0xb0, rbx /*rscratch*/); // 0xb0; 192-bit key goes up to 0xc0
|
||||
load_key(xmm_key12, key, 0xc0, rbx /*rscratch*/); // 0xc0; 192-bit key goes up to 0xc0
|
||||
}
|
||||
if (k == 2) {
|
||||
load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes up to 0xe0
|
||||
load_key(xmm_key11, key, 0xb0, rbx /*rscratch*/); // 0xb0; 256-bit key goes up to 0xe0
|
||||
}
|
||||
__ align(OptoLoopAlignment);
|
||||
__ BIND(L_singleBlock_loopTop[k]);
|
||||
@ -1475,11 +1465,11 @@ __ opc(xmm_result3, src_reg); \
|
||||
}
|
||||
if (k == 2) {
|
||||
__ aesdec(xmm_result, xmm_key11);
|
||||
load_key(key_tmp, key, 0xc0);
|
||||
load_key(key_tmp, key, 0xc0, rbx /*rscratch*/);
|
||||
__ aesdec(xmm_result, key_tmp);
|
||||
load_key(key_tmp, key, 0xd0);
|
||||
load_key(key_tmp, key, 0xd0, rbx /*rscratch*/);
|
||||
__ aesdec(xmm_result, key_tmp);
|
||||
load_key(key_tmp, key, 0xe0);
|
||||
load_key(key_tmp, key, 0xe0, rbx /*rscratch*/);
|
||||
__ aesdec(xmm_result, key_tmp);
|
||||
}
|
||||
|
||||
@ -1611,26 +1601,29 @@ void StubGenerator::roundDeclast(XMMRegister xmm_reg) {
|
||||
|
||||
|
||||
// Utility routine for loading a 128-bit key word in little endian format
|
||||
// can optionally specify that the shuffle mask is already in an xmmregister
|
||||
void StubGenerator::load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask) {
|
||||
__ movdqu(xmmdst, Address(key, offset));
|
||||
if (xmm_shuf_mask != xnoreg) {
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
} else {
|
||||
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
}
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
}
|
||||
|
||||
void StubGenerator::load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch) {
|
||||
__ movdqu(xmmdst, Address(key, offset));
|
||||
__ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()), rscratch);
|
||||
}
|
||||
|
||||
void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask) {
|
||||
__ movdqu(xmmdst, Address(key, offset));
|
||||
if (xmm_shuf_mask != xnoreg) {
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
} else {
|
||||
__ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
}
|
||||
__ pshufb(xmmdst, xmm_shuf_mask);
|
||||
__ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void StubGenerator::ev_load_key(XMMRegister xmmdst, Register key, int offset, Register rscratch) {
|
||||
__ movdqu(xmmdst, Address(key, offset));
|
||||
__ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr()), rscratch);
|
||||
__ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
|
||||
// AES-ECB Encrypt Operation
|
||||
void StubGenerator::aesecb_encrypt(Register src_addr, Register dest_addr, Register key, Register len) {
|
||||
const Register pos = rax;
|
||||
@ -1659,7 +1652,7 @@ void StubGenerator::aesecb_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
|
||||
// Load Key shuf mask
|
||||
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
// Load and shuffle key based on number of rounds
|
||||
ev_load_key(xmm8, key, 0 * 16, xmm_key_shuf_mask);
|
||||
@ -1869,7 +1862,7 @@ void StubGenerator::aesecb_decrypt(Register src_addr, Register dest_addr, Regist
|
||||
|
||||
// Load Key shuf mask
|
||||
const XMMRegister xmm_key_shuf_mask = xmm31; // used temporarily to swap key bytes up front
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
// Load and shuffle round keys. The java expanded key ordering is rotated one position in decryption.
|
||||
// So the first round key is loaded from 1*16 here and last round key is loaded from 0*16
|
||||
@ -2098,7 +2091,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
__ evshufi64x2(xmm8, xmm0, xmm0, 0, Assembler::AVX_512bit);
|
||||
|
||||
// load lbswap mask
|
||||
__ evmovdquq(xmm16, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, r15);
|
||||
__ evmovdquq(xmm16, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
|
||||
//shuffle counter using lbswap_mask
|
||||
__ vpshufb(xmm8, xmm8, xmm16, Assembler::AVX_512bit);
|
||||
@ -2108,20 +2101,20 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
// The counter is incremented after each block i.e. 16 bytes is processed;
|
||||
// each zmm register has 4 counter values as its MSB
|
||||
// the counters are incremented in parallel
|
||||
__ vpaddd(xmm8, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, r15);//linc0
|
||||
__ vpaddd(xmm9, xmm8, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//linc4(rip)
|
||||
__ vpaddd(xmm10, xmm9, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm11, xmm10, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm12, xmm11, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm13, xmm12, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm14, xmm13, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm15, xmm14, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ vpaddd(xmm8, xmm8, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm9, xmm8, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm10, xmm9, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm11, xmm10, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm12, xmm11, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm13, xmm12, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm14, xmm13, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ vpaddd(xmm15, xmm14, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
|
||||
// load linc32 mask in zmm register.linc32 increments counter by 32
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 256), Assembler::AVX_512bit, r15);//Linc32
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc32_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
|
||||
// xmm31 contains the key shuffle mask.
|
||||
__ movdqu(xmm31, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
|
||||
__ movdqu(xmm31, ExternalAddress(key_shuffle_mask_addr()), r15 /*rscratch*/);
|
||||
// Load key function loads 128 bit key and shuffles it. Then we broadcast the shuffled key to convert it into a 512 bit value.
|
||||
// For broadcasting the values to ZMM, vshufi64 is used instead of evbroadcasti64x2 as the source in this case is ZMM register
|
||||
// that holds shuffled key value.
|
||||
@ -2237,14 +2230,14 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
__ jcc(Assembler::aboveEqual, REMAINDER_4);
|
||||
// At this point, we will process 16 bytes of data at a time.
|
||||
// So load xmm19 with counter increment value as 1
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc1_addr()), Assembler::AVX_128bit, r15 /*rscratch*/);
|
||||
__ jmp(REMAINDER_LOOP);
|
||||
|
||||
// Each ZMM register can be used to encode 64 bytes of data, so we have 4 ZMM registers to encode 256 bytes of data
|
||||
__ bind(REMAINDER_16);
|
||||
__ subq(len_reg, 256);
|
||||
// As we process 16 blocks at a time, load mask for incrementing the counter value by 16
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 320), Assembler::AVX_512bit, r15);//Linc16(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc16_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
// shuffle counter and XOR counter with roundkey1
|
||||
__ vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
|
||||
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
|
||||
@ -2309,14 +2302,14 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
__ cmpl(len_reg, 64);
|
||||
__ jcc(Assembler::aboveEqual, REMAINDER_4);
|
||||
//load mask for incrementing the counter value by 1
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc1_addr()), Assembler::AVX_128bit, r15 /*rscratch*/);
|
||||
__ jmp(REMAINDER_LOOP);
|
||||
|
||||
// Each ZMM register can be used to encode 64 bytes of data, so we have 2 ZMM registers to encode 128 bytes of data
|
||||
__ bind(REMAINDER_8);
|
||||
__ subq(len_reg, 128);
|
||||
// As we process 8 blocks at a time, load mask for incrementing the counter value by 8
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 192), Assembler::AVX_512bit, r15);//Linc8(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc8_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
// shuffle counters and xor with roundkey1
|
||||
__ vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
|
||||
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
|
||||
@ -2369,14 +2362,14 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
__ cmpl(len_reg, 64);
|
||||
__ jcc(Assembler::aboveEqual, REMAINDER_4);
|
||||
// load mask for incrementing the counter value by 1
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc1_addr()), Assembler::AVX_128bit, r15 /*rscratch*/);
|
||||
__ jmp(REMAINDER_LOOP);
|
||||
|
||||
// Each ZMM register can be used to encode 64 bytes of data, so we have 1 ZMM register used in this block of code
|
||||
__ bind(REMAINDER_4);
|
||||
__ subq(len_reg, 64);
|
||||
// As we process 4 blocks at a time, load mask for incrementing the counter value by 4
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, r15);//Linc4(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
// XOR counter with first roundkey
|
||||
__ vpshufb(xmm0, xmm8, xmm16, Assembler::AVX_512bit);
|
||||
__ evpxorq(xmm0, xmm0, xmm20, Assembler::AVX_512bit);
|
||||
@ -2420,7 +2413,7 @@ void StubGenerator::aesctr_encrypt(Register src_addr, Register dest_addr, Regist
|
||||
__ evmovdquq(Address(dest_addr, pos, Address::times_1, 0), xmm0, Assembler::AVX_512bit);
|
||||
__ addq(pos, 64);
|
||||
// load mask for incrementing the counter value by 1
|
||||
__ evmovdquq(xmm19, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 80), Assembler::AVX_128bit, r15);//Linc0 + 16(rip)
|
||||
__ evmovdquq(xmm19, ExternalAddress(counter_mask_linc1_addr()), Assembler::AVX_128bit, r15 /*rscratch*/);
|
||||
|
||||
// For a single block, the AES rounds start here.
|
||||
__ bind(REMAINDER_LOOP);
|
||||
@ -2557,7 +2550,7 @@ void StubGenerator::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
|
||||
__ evpxorq(TMP1, TMP1, TMP3, Assembler::AVX_512bit);
|
||||
__ evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
|
||||
|
||||
__ evmovdquq(TMP3, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, r15);
|
||||
__ evmovdquq(TMP3, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, r15 /*rscratch*/);
|
||||
__ evpclmulqdq(TMP2, TMP3, GH, 0x01, Assembler::AVX_512bit);
|
||||
__ vpslldq(TMP2, TMP2, 8, Assembler::AVX_512bit);
|
||||
__ evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
|
||||
@ -2568,7 +2561,7 @@ void StubGenerator::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
|
||||
__ vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl) {
|
||||
void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl, Register rscratch) {
|
||||
const XMMRegister HK = xmm6;
|
||||
const XMMRegister ZT5 = xmm4;
|
||||
const XMMRegister ZT7 = xmm7;
|
||||
@ -2577,11 +2570,11 @@ void StubGenerator::generateHtbl_48_block_zmm(Register htbl, Register avx512_htb
|
||||
Label GFMUL_AVX512;
|
||||
|
||||
__ movdqu(HK, Address(htbl, 0));
|
||||
__ movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ movdqu(xmm10, ExternalAddress(ghash_long_swap_mask_addr()), rscratch);
|
||||
__ vpshufb(HK, HK, xmm10, Assembler::AVX_128bit);
|
||||
|
||||
__ movdqu(xmm11, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 64)); // Poly
|
||||
__ movdqu(xmm12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 80)); // Twoone
|
||||
__ movdqu(xmm11, ExternalAddress(ghash_polynomial_addr()), rscratch);
|
||||
__ movdqu(xmm12, ExternalAddress(ghash_polynomial_two_one_addr()), rscratch);
|
||||
// Compute H ^ 2 from the input subkeyH
|
||||
__ movdqu(xmm2, xmm6);
|
||||
__ vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit);
|
||||
@ -2835,7 +2828,7 @@ void StubGenerator::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl
|
||||
__ vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit);
|
||||
__ vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit);
|
||||
__ vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit);
|
||||
__ evmovdquq(ZTMP12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
|
||||
__ evmovdquq(ZTMP12, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
|
||||
}
|
||||
// AES round 7
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
@ -2942,7 +2935,7 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
|
||||
__ movdqu(CTR_BLOCKx, Address(counter, 0));
|
||||
__ movdqu(AAD_HASHx, Address(state, 0));
|
||||
// Load lswap mask for ghash
|
||||
__ movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()), rbx);
|
||||
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
|
||||
// Shuffle input state using lswap mask
|
||||
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
|
||||
|
||||
@ -2952,14 +2945,14 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
|
||||
// Broadcast counter value to 512 bit register
|
||||
__ evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit);
|
||||
// Load counter shuffle mask
|
||||
__ evmovdquq(xmm24, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, rbx);
|
||||
__ evmovdquq(xmm24, ExternalAddress(counter_shuffle_mask_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
|
||||
// Shuffle counter
|
||||
__ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// Load mask for incrementing counter
|
||||
__ evmovdquq(COUNTER_INC_MASK, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, rbx);
|
||||
__ evmovdquq(COUNTER_INC_MASK, ExternalAddress(counter_mask_linc4_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
|
||||
// Pre-increment counter
|
||||
__ vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, rbx);
|
||||
__ vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(counter_mask_linc0_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
|
||||
__ vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
__ vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
__ vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
@ -2972,7 +2965,7 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
|
||||
// Move 256 bytes of data
|
||||
loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
// Load key shuffle mask
|
||||
__ movdqu(xmm29, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx);
|
||||
__ movdqu(xmm29, ExternalAddress(key_shuffle_mask_addr()), rbx /*rscratch*/);
|
||||
// Load 0th AES round key
|
||||
ev_load_key(ZTMP4, key, 0, xmm29);
|
||||
// AES-ROUND0, xmm24 has the shuffle mask
|
||||
@ -3135,7 +3128,7 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
|
||||
vhpxori4x128(ZTMP1, ZTMP11);
|
||||
vhpxori4x128(ZTMP2, ZTMP12);
|
||||
// Load reduction polynomial and compute final reduction
|
||||
__ evmovdquq(ZTMP15, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
|
||||
__ evmovdquq(ZTMP15, ExternalAddress(ghash_polynomial_reduction_addr()), Assembler::AVX_512bit, rbx /*rscratch*/);
|
||||
vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4);
|
||||
|
||||
// Pre-increment counter for next operation
|
||||
@ -3144,14 +3137,14 @@ void StubGenerator::aesgcm_encrypt(Register in, Register len, Register ct, Regis
|
||||
__ vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
|
||||
__ movdqu(Address(counter, 0), CTR_BLOCKx);
|
||||
// Load ghash lswap mask
|
||||
__ movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ movdqu(xmm24, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
|
||||
// Shuffle ghash using lbswap_mask and store it
|
||||
__ vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
|
||||
__ movdqu(Address(state, 0), AAD_HASHx);
|
||||
__ jmp(ENC_DEC_DONE);
|
||||
|
||||
__ bind(GENERATE_HTBL_48_BLKS);
|
||||
generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl);
|
||||
generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl, rbx /*rscratch*/);
|
||||
|
||||
__ bind(ENC_DEC_DONE);
|
||||
__ movq(rax, pos);
|
||||
|
@ -32,55 +32,48 @@
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
// GHASH intrinsic stubs
|
||||
|
||||
|
||||
// Polynomial x^128+x^127+x^126+x^121+1
|
||||
address StubGenerator::generate_ghash_polynomial_addr() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "_ghash_poly_addr");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none);
|
||||
__ emit_data64(0xc200000000000000, relocInfo::none);
|
||||
|
||||
return start;
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t GHASH_SHUFFLE_MASK[] = {
|
||||
0x0F0F0F0F0F0F0F0FUL, 0x0F0F0F0F0F0F0F0FUL,
|
||||
};
|
||||
static address ghash_shuffle_mask_addr() {
|
||||
return (address)GHASH_SHUFFLE_MASK;
|
||||
}
|
||||
|
||||
address StubGenerator::generate_ghash_shufflemask_addr() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "_ghash_shuffmask_addr");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
|
||||
__ emit_data64(0x0f0f0f0f0f0f0f0f, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
|
||||
// byte swap x86 long
|
||||
address StubGenerator::generate_ghash_long_swap_mask() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
|
||||
__ emit_data64(0x0706050403020100, relocInfo::none );
|
||||
|
||||
return start;
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t GHASH_LONG_SWAP_MASK[] = {
|
||||
0x0F0E0D0C0B0A0908UL, 0x0706050403020100UL,
|
||||
};
|
||||
address StubGenerator::ghash_long_swap_mask_addr() {
|
||||
return (address)GHASH_LONG_SWAP_MASK;
|
||||
}
|
||||
|
||||
// byte swap x86 byte array
|
||||
address StubGenerator::generate_ghash_byte_swap_mask() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
|
||||
address start = __ pc();
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t GHASH_BYTE_SWAP_MASK[] = {
|
||||
0x08090A0B0C0D0E0FUL, 0x0001020304050607UL,
|
||||
};
|
||||
address StubGenerator::ghash_byte_swap_mask_addr() {
|
||||
return (address)GHASH_BYTE_SWAP_MASK;
|
||||
}
|
||||
|
||||
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
|
||||
__ emit_data64(0x0001020304050607, relocInfo::none );
|
||||
// Polynomial x^128+x^127+x^126+x^121+1
|
||||
ATTRIBUTE_ALIGNED(16) uint64_t GHASH_POLYNOMIAL[] = {
|
||||
0x0000000000000001UL, 0xC200000000000000UL,
|
||||
};
|
||||
address StubGenerator::ghash_polynomial_addr() {
|
||||
return (address)GHASH_POLYNOMIAL;
|
||||
}
|
||||
|
||||
return start;
|
||||
|
||||
// GHASH intrinsic stubs
|
||||
|
||||
void StubGenerator::generate_ghash_stubs() {
|
||||
if (UseGHASHIntrinsics) {
|
||||
if (VM_Version::supports_avx()) {
|
||||
StubRoutines::_ghash_processBlocks = generate_avx_ghash_processBlocks();
|
||||
} else {
|
||||
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -110,7 +103,9 @@ address StubGenerator::generate_ghash_processBlocks() {
|
||||
|
||||
__ enter();
|
||||
|
||||
__ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ push(rbx); // scratch
|
||||
|
||||
__ movdqu(xmm_temp10, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
__ movdqu(xmm_temp0, Address(state, 0));
|
||||
__ pshufb(xmm_temp0, xmm_temp10);
|
||||
@ -118,7 +113,7 @@ address StubGenerator::generate_ghash_processBlocks() {
|
||||
|
||||
__ bind(L_ghash_loop);
|
||||
__ movdqu(xmm_temp2, Address(data, 0));
|
||||
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||
__ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
__ movdqu(xmm_temp1, Address(subkeyH, 0));
|
||||
__ pshufb(xmm_temp1, xmm_temp10);
|
||||
@ -208,6 +203,9 @@ address StubGenerator::generate_ghash_processBlocks() {
|
||||
__ bind(L_exit);
|
||||
__ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
|
||||
__ movdqu(Address(state, 0), xmm_temp6); // store the result
|
||||
|
||||
__ pop(rbx);
|
||||
|
||||
__ leave();
|
||||
__ ret(0);
|
||||
|
||||
@ -228,9 +226,11 @@ address StubGenerator::generate_avx_ghash_processBlocks() {
|
||||
const Register data = c_rarg2;
|
||||
const Register blocks = c_rarg3;
|
||||
__ enter();
|
||||
__ push(rbx);
|
||||
|
||||
avx_ghash(state, htbl, data, blocks);
|
||||
|
||||
__ pop(rbx);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
@ -268,7 +268,7 @@ void StubGenerator::avx_ghash(Register input_state, Register htbl,
|
||||
|
||||
// Shuffle the input state
|
||||
__ bind(BEGIN_PROCESS);
|
||||
__ movdqu(lswap_mask, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ movdqu(lswap_mask, ExternalAddress(ghash_long_swap_mask_addr()), rbx /*rscratch*/);
|
||||
__ movdqu(state, Address(input_state, 0));
|
||||
__ vpshufb(state, state, lswap_mask, Assembler::AVX_128bit);
|
||||
|
||||
@ -284,7 +284,7 @@ void StubGenerator::avx_ghash(Register input_state, Register htbl,
|
||||
//Each block = 16 bytes.
|
||||
__ bind(PROCESS_8_BLOCKS);
|
||||
__ subl(blocks, 8);
|
||||
__ movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||
__ movdqu(bswap_mask, ExternalAddress(ghash_byte_swap_mask_addr()), rbx /*rscratch*/);
|
||||
__ movdqu(data, Address(input_data, 16 * 7));
|
||||
__ vpshufb(data, data, bswap_mask, Assembler::AVX_128bit);
|
||||
//Loading 1*16 as calculated powers of H required starts at that location.
|
||||
@ -369,7 +369,7 @@ void StubGenerator::avx_ghash(Register input_state, Register htbl,
|
||||
// Since this is one block operation we will only use H * 2 i.e. the first power of H
|
||||
__ bind(ONE_BLK_INIT);
|
||||
__ movdqu(tmp0, Address(htbl, 1 * 16));
|
||||
__ movdqu(bswap_mask, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||
__ movdqu(bswap_mask, ExternalAddress(ghash_byte_swap_mask_addr()), rbx /*rscratch*/);
|
||||
|
||||
//Do one (128 bit x 128 bit) carry-less multiplication at a time followed by a reduction.
|
||||
__ bind(PROCESS_1_BLOCK);
|
||||
@ -393,7 +393,7 @@ void StubGenerator::avx_ghash(Register input_state, Register htbl,
|
||||
gfmul(tmp0, state);
|
||||
|
||||
__ bind(GENERATE_HTBL_1_BLK);
|
||||
generateHtbl_one_block(htbl);
|
||||
generateHtbl_one_block(htbl, rbx /*rscratch*/);
|
||||
|
||||
__ bind(GENERATE_HTBL_8_BLKS);
|
||||
generateHtbl_eight_blocks(htbl);
|
||||
@ -472,23 +472,23 @@ void StubGenerator::schoolbookAAD(int i, Register htbl, XMMRegister data,
|
||||
|
||||
// This method takes the subkey after expansion as input and generates 1 * 16 power of subkey H.
|
||||
// The power of H is used in reduction process for one block ghash
|
||||
void StubGenerator::generateHtbl_one_block(Register htbl) {
|
||||
void StubGenerator::generateHtbl_one_block(Register htbl, Register rscratch) {
|
||||
const XMMRegister t = xmm13;
|
||||
|
||||
// load the original subkey hash
|
||||
__ movdqu(t, Address(htbl, 0));
|
||||
// shuffle using long swap mask
|
||||
__ movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
__ movdqu(xmm10, ExternalAddress(ghash_long_swap_mask_addr()), rscratch);
|
||||
__ vpshufb(t, t, xmm10, Assembler::AVX_128bit);
|
||||
|
||||
// Compute H' = GFMUL(H, 2)
|
||||
__ vpsrld(xmm3, t, 7, Assembler::AVX_128bit);
|
||||
__ movdqu(xmm4, ExternalAddress(StubRoutines::x86::ghash_shufflemask_addr()));
|
||||
__ movdqu(xmm4, ExternalAddress(ghash_shuffle_mask_addr()), rscratch);
|
||||
__ vpshufb(xmm3, xmm3, xmm4, Assembler::AVX_128bit);
|
||||
__ movl(rax, 0xff00);
|
||||
__ movdl(xmm4, rax);
|
||||
__ vpshufb(xmm4, xmm4, xmm3, Assembler::AVX_128bit);
|
||||
__ movdqu(xmm5, ExternalAddress(StubRoutines::x86::ghash_polynomial_addr()));
|
||||
__ movdqu(xmm5, ExternalAddress(ghash_polynomial_addr()), rscratch);
|
||||
__ vpand(xmm5, xmm5, xmm4, Assembler::AVX_128bit);
|
||||
__ vpsrld(xmm3, t, 31, Assembler::AVX_128bit);
|
||||
__ vpslld(xmm4, t, 1, Assembler::AVX_128bit);
|
||||
@ -534,21 +534,4 @@ void StubGenerator::generateHtbl_eight_blocks(Register htbl) {
|
||||
gfmul(tmp0, t);
|
||||
}
|
||||
|
||||
|
||||
void StubGenerator::generate_ghash_stubs() {
|
||||
if (UseGHASHIntrinsics) {
|
||||
if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
}
|
||||
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
|
||||
if (VM_Version::supports_avx()) {
|
||||
StubRoutines::x86::_ghash_shuffmask_addr = generate_ghash_shufflemask_addr();
|
||||
StubRoutines::x86::_ghash_poly_addr = generate_ghash_polynomial_addr();
|
||||
StubRoutines::_ghash_processBlocks = generate_avx_ghash_processBlocks();
|
||||
} else {
|
||||
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef __
|
||||
|
@ -34,12 +34,6 @@
|
||||
// a description of how to extend it, see the stubRoutines.hpp file.
|
||||
|
||||
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
|
||||
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
||||
address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_poly_addr = NULL;
|
||||
address StubRoutines::x86::_ghash_shuffmask_addr = NULL;
|
||||
address StubRoutines::x86::_upper_word_mask_addr = NULL;
|
||||
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
@ -77,7 +71,6 @@ address StubRoutines::x86::_shuffle_base64 = NULL;
|
||||
address StubRoutines::x86::_avx2_shuffle_base64 = NULL;
|
||||
address StubRoutines::x86::_avx2_input_mask_base64 = NULL;
|
||||
address StubRoutines::x86::_avx2_lut_base64 = NULL;
|
||||
address StubRoutines::x86::_counter_mask_addr = NULL;
|
||||
address StubRoutines::x86::_lookup_lo_base64 = NULL;
|
||||
address StubRoutines::x86::_lookup_hi_base64 = NULL;
|
||||
address StubRoutines::x86::_lookup_lo_base64url = NULL;
|
||||
@ -87,7 +80,6 @@ address StubRoutines::x86::_join_0_1_base64 = NULL;
|
||||
address StubRoutines::x86::_join_1_2_base64 = NULL;
|
||||
address StubRoutines::x86::_join_2_3_base64 = NULL;
|
||||
address StubRoutines::x86::_decoding_table_base64 = NULL;
|
||||
address StubRoutines::x86::_ghash_poly512_addr = NULL;
|
||||
#endif
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
|
@ -123,11 +123,6 @@ class x86 {
|
||||
static jint _mxcsr_std;
|
||||
|
||||
static address _verify_mxcsr_entry;
|
||||
// shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
|
||||
static address _key_shuffle_mask_addr;
|
||||
|
||||
//shuffle mask for big-endian 128-bit integers
|
||||
static address _counter_shuffle_mask_addr;
|
||||
|
||||
static address _method_entry_barrier;
|
||||
|
||||
@ -142,11 +137,6 @@ class x86 {
|
||||
#endif // _LP64
|
||||
// table for CRC32C
|
||||
static juint* _crc32c_table;
|
||||
// swap mask for ghash
|
||||
static address _ghash_long_swap_mask_addr;
|
||||
static address _ghash_byte_swap_mask_addr;
|
||||
static address _ghash_poly_addr;
|
||||
static address _ghash_shuffmask_addr;
|
||||
|
||||
// upper word mask for sha1
|
||||
static address _upper_word_mask_addr;
|
||||
@ -187,7 +177,6 @@ class x86 {
|
||||
static address _k512_W_addr;
|
||||
// byte flip mask for sha512
|
||||
static address _pshuffle_byte_flip_mask_addr_sha512;
|
||||
static address _counter_mask_addr;
|
||||
// Masks for base64
|
||||
static address _encoding_table_base64;
|
||||
static address _shuffle_base64;
|
||||
@ -203,7 +192,6 @@ class x86 {
|
||||
static address _join_1_2_base64;
|
||||
static address _join_2_3_base64;
|
||||
static address _decoding_table_base64;
|
||||
static address _ghash_poly512_addr;
|
||||
#endif
|
||||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
@ -211,20 +199,13 @@ class x86 {
|
||||
public:
|
||||
static address addr_mxcsr_std() { return (address)&_mxcsr_std; }
|
||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||
static address counter_shuffle_mask_addr() { return _counter_shuffle_mask_addr; }
|
||||
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||
#ifdef _LP64
|
||||
static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; }
|
||||
static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; }
|
||||
static address crc_table_avx512_addr() { return (address)_crc_table_avx512; }
|
||||
static address crc32c_table_avx512_addr() { return (address)_crc32c_table_avx512; }
|
||||
static address ghash_polynomial512_addr() { return _ghash_poly512_addr; }
|
||||
#endif // _LP64
|
||||
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||
static address ghash_shufflemask_addr() { return _ghash_shuffmask_addr; }
|
||||
static address ghash_polynomial_addr() { return _ghash_poly_addr; }
|
||||
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
|
||||
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
|
||||
static address k256_addr() { return _k256_adr; }
|
||||
@ -333,7 +314,6 @@ class x86 {
|
||||
static address base64_avx2_shuffle_addr() { return _avx2_shuffle_base64; }
|
||||
static address base64_avx2_input_mask_addr() { return _avx2_input_mask_base64; }
|
||||
static address base64_avx2_lut_addr() { return _avx2_lut_base64; }
|
||||
static address counter_mask_addr() { return _counter_mask_addr; }
|
||||
static address base64_vbmi_lookup_lo_addr() { return _lookup_lo_base64; }
|
||||
static address base64_vbmi_lookup_hi_addr() { return _lookup_hi_base64; }
|
||||
static address base64_vbmi_lookup_lo_url_addr() { return _lookup_lo_base64url; }
|
||||
|
Loading…
Reference in New Issue
Block a user