8273297: AES/GCM non-AVX512+VAES CPUs suffer after 8267125
Reviewed-by: ascarpino, sviswanathan, aph
This commit is contained in:
parent
753b25633b
commit
13e9ea9e92
@ -55,6 +55,9 @@
|
||||
// No support for generic vector operands.
|
||||
static const bool supports_generic_vector_operands = false;
|
||||
|
||||
// No support for 48 extra htbl entries in aes-gcm intrinsic
|
||||
static const int htbl_entries = -1;
|
||||
|
||||
static constexpr bool isSimpleConstant64(jlong value) {
|
||||
// Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
|
||||
// Probably always true, even if a temp register is required.
|
||||
|
@ -56,6 +56,9 @@
|
||||
// No support for generic vector operands.
|
||||
static const bool supports_generic_vector_operands = false;
|
||||
|
||||
// No support for 48 extra htbl entries in aes-gcm intrinsic
|
||||
static const int htbl_entries = -1;
|
||||
|
||||
static constexpr bool isSimpleConstant64(jlong value) {
|
||||
// Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
|
||||
return false;
|
||||
|
@ -57,6 +57,9 @@
|
||||
// No support for generic vector operands.
|
||||
static const bool supports_generic_vector_operands = false;
|
||||
|
||||
// No support for 48 extra htbl entries in aes-gcm intrinsic
|
||||
static const int htbl_entries = -1;
|
||||
|
||||
static constexpr bool isSimpleConstant64(jlong value) {
|
||||
// Probably always true, even if a temp register is required.
|
||||
return true;
|
||||
|
@ -57,6 +57,9 @@
|
||||
// No support for generic vector operands.
|
||||
static const bool supports_generic_vector_operands = false;
|
||||
|
||||
// No support for 48 extra htbl entries in aes-gcm intrinsic
|
||||
static const int htbl_entries = -1;
|
||||
|
||||
static constexpr bool isSimpleConstant64(jlong value) {
|
||||
// Probably always true, even if a temp register is required.
|
||||
return true;
|
||||
|
@ -946,7 +946,7 @@ private:
|
||||
void lastroundDec(XMMRegister key, int rnum);
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
|
||||
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
|
||||
void generateHtbl_48_block_zmm(Register htbl);
|
||||
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl);
|
||||
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
|
||||
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
|
||||
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
|
||||
@ -957,7 +957,7 @@ public:
|
||||
void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
|
||||
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
|
||||
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
|
||||
Register state, Register subkeyHtbl, Register counter);
|
||||
Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1293,7 +1293,7 @@ void MacroAssembler::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
|
||||
vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void MacroAssembler::generateHtbl_48_block_zmm(Register htbl) {
|
||||
void MacroAssembler::generateHtbl_48_block_zmm(Register htbl, Register avx512_htbl) {
|
||||
const XMMRegister HK = xmm6;
|
||||
const XMMRegister ZT5 = xmm4;
|
||||
const XMMRegister ZT7 = xmm7;
|
||||
@ -1320,48 +1320,48 @@ void MacroAssembler::generateHtbl_48_block_zmm(Register htbl) {
|
||||
vpcmpeqd(xmm2, xmm2, xmm12, AVX_128bit);
|
||||
vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit);
|
||||
vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
|
||||
movdqu(Address(htbl, 16 * 56), xmm6); // H ^ 2
|
||||
movdqu(Address(avx512_htbl, 16 * 47), xmm6); // H ^ 2
|
||||
// Compute the remaining three powers of H using XMM registers and all following powers using ZMM
|
||||
movdqu(ZT5, HK);
|
||||
vinserti32x4(ZT7, ZT7, HK, 3);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 55), ZT5); // H ^ 2 * 2
|
||||
movdqu(Address(avx512_htbl, 16 * 46), ZT5); // H ^ 2 * 2
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 2);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 54), ZT5); // H ^ 2 * 3
|
||||
movdqu(Address(avx512_htbl, 16 * 45), ZT5); // H ^ 2 * 3
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 1);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 53), ZT5); // H ^ 2 * 4
|
||||
movdqu(Address(avx512_htbl, 16 * 44), ZT5); // H ^ 2 * 4
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 0);
|
||||
|
||||
evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit);
|
||||
evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 49), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 40), ZT7, Assembler::AVX_512bit);
|
||||
evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 45), ZT8, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 36), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 41), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 32), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 37), ZT8, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 28), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 33), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 24), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 29), ZT8, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 20), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 25), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 16), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 21), ZT8, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 12), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 17), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 8), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 13), ZT8, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 4), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 9), ZT7, Assembler::AVX_512bit);
|
||||
evmovdquq(Address(avx512_htbl, 16 * 0), ZT7, Assembler::AVX_512bit);
|
||||
ret(0);
|
||||
}
|
||||
|
||||
@ -1477,8 +1477,8 @@ void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtb
|
||||
|
||||
// ZTMP19 & ZTMP20 used for loading hash key
|
||||
// Pre-load hash key
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
|
||||
// Load data for computing ghash
|
||||
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
|
||||
@ -1499,7 +1499,7 @@ void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtb
|
||||
// GHASH 4 blocks
|
||||
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19);
|
||||
// Load the next hkey and Ghash data
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
@ -1510,7 +1510,7 @@ void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtb
|
||||
// GHASH 4 blocks(11 to 8)
|
||||
carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
|
||||
// Load the next hkey and GDATA
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
@ -1628,8 +1628,7 @@ void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtb
|
||||
}
|
||||
|
||||
void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
|
||||
Register state, Register subkeyHtbl, Register counter) {
|
||||
|
||||
Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter) {
|
||||
Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32,
|
||||
AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16;
|
||||
const XMMRegister CTR_BLOCKx = xmm9;
|
||||
@ -1761,7 +1760,7 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
// 2) No reduction -> accumulate multiplication values
|
||||
// 3) Final reduction post 48 blocks -> new ghash value is computed for the next round
|
||||
// Reduction value = first time
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
@ -1778,12 +1777,12 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
// Each call uses 4 subkeyHtbl values, so increment the index by 4.
|
||||
bind(GHASH_16_AES_16);
|
||||
// Reduction value = no reduction
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
// Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
|
||||
ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
// Calculated ghash value needs to be moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline
|
||||
@ -1792,7 +1791,7 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
|
||||
// Restart the pipeline
|
||||
// Reduction value = first time
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
ghash16_encrypt16_parallel(key, avx512_subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
@ -1812,8 +1811,8 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
|
||||
// Load ghash keys
|
||||
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
|
||||
// Ghash blocks 0 - 3
|
||||
carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15);
|
||||
@ -1837,8 +1836,8 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP15, Address(avx512_subkeyHtbl, rbx, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(avx512_subkeyHtbl, rbx, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
|
||||
// ghash blocks 0 - 3
|
||||
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15);
|
||||
@ -1884,7 +1883,7 @@ void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Regi
|
||||
jmp(ENC_DEC_DONE);
|
||||
|
||||
bind(GENERATE_HTBL_48_BLKS);
|
||||
generateHtbl_48_block_zmm(subkeyHtbl);
|
||||
generateHtbl_48_block_zmm(subkeyHtbl, avx512_subkeyHtbl);
|
||||
|
||||
bind(ENC_DEC_DONE);
|
||||
movq(rax, pos);
|
||||
|
@ -148,6 +148,9 @@
|
||||
static const bool int_in_long = false;
|
||||
#endif
|
||||
|
||||
// Number of htbl entries for aes-gcm intrinsic
|
||||
static const int htbl_entries = 96;
|
||||
|
||||
// Does the CPU supports vector variable shift instructions?
|
||||
static bool supports_vector_variable_shifts(void) {
|
||||
return (UseAVX >= 2);
|
||||
|
@ -4412,7 +4412,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register state = c_rarg5;
|
||||
const Address subkeyH_mem(rbp, 2 * wordSize);
|
||||
const Register subkeyHtbl = r11;
|
||||
const Address counter_mem(rbp, 3 * wordSize);
|
||||
const Address avx512_subkeyH_mem(rbp, 3 * wordSize);
|
||||
const Register avx512_subkeyHtbl = r13;
|
||||
const Address counter_mem(rbp, 4 * wordSize);
|
||||
const Register counter = r12;
|
||||
#else
|
||||
const Address key_mem(rbp, 6 * wordSize);
|
||||
@ -4421,7 +4423,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register state = r13;
|
||||
const Address subkeyH_mem(rbp, 8 * wordSize);
|
||||
const Register subkeyHtbl = r14;
|
||||
const Address counter_mem(rbp, 9 * wordSize);
|
||||
const Address avx512_subkeyH_mem(rbp, 9 * wordSize);
|
||||
const Register avx512_subkeyHtbl = r12;
|
||||
const Address counter_mem(rbp, 10 * wordSize);
|
||||
const Register counter = rsi;
|
||||
#endif
|
||||
__ enter();
|
||||
@ -4438,9 +4442,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(state, state_mem);
|
||||
#endif
|
||||
__ movptr(subkeyHtbl, subkeyH_mem);
|
||||
__ movptr(avx512_subkeyHtbl, avx512_subkeyH_mem);
|
||||
__ movptr(counter, counter_mem);
|
||||
|
||||
__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, counter);
|
||||
__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, avx512_subkeyHtbl, counter);
|
||||
|
||||
// Restore state before leaving routine
|
||||
#ifdef _WIN64
|
||||
|
@ -417,7 +417,7 @@ class methodHandle;
|
||||
\
|
||||
do_class(com_sun_crypto_provider_galoisCounterMode, "com/sun/crypto/provider/GaloisCounterMode") \
|
||||
do_intrinsic(_galoisCounterMode_AESCrypt, com_sun_crypto_provider_galoisCounterMode, gcm_crypt_name, aes_gcm_signature, F_S) \
|
||||
do_name(gcm_crypt_name, "implGCMCrypt") \
|
||||
do_name(gcm_crypt_name, "implGCMCrypt0") \
|
||||
do_signature(aes_gcm_signature, "([BII[BI[BILcom/sun/crypto/provider/GCTR;Lcom/sun/crypto/provider/GHASH;)I") \
|
||||
\
|
||||
/* support for sun.security.provider.MD5 */ \
|
||||
|
@ -2488,7 +2488,8 @@ Node* GraphKit::make_runtime_call(int flags,
|
||||
Node* parm0, Node* parm1,
|
||||
Node* parm2, Node* parm3,
|
||||
Node* parm4, Node* parm5,
|
||||
Node* parm6, Node* parm7) {
|
||||
Node* parm6, Node* parm7,
|
||||
Node* parm8) {
|
||||
assert(call_addr != NULL, "must not call NULL targets");
|
||||
|
||||
// Slow-path call
|
||||
@ -2535,7 +2536,8 @@ Node* GraphKit::make_runtime_call(int flags,
|
||||
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
|
||||
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
|
||||
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
|
||||
/* close each nested if ===> */ } } } } } } } }
|
||||
if (parm8 != NULL) { call->init_req(TypeFunc::Parms+8, parm8);
|
||||
/* close each nested if ===> */ } } } } } } } } }
|
||||
assert(call->in(call->req()-1) != NULL, "must initialize all parms");
|
||||
|
||||
if (!is_leaf) {
|
||||
|
@ -802,7 +802,8 @@ class GraphKit : public Phase {
|
||||
Node* parm0 = NULL, Node* parm1 = NULL,
|
||||
Node* parm2 = NULL, Node* parm3 = NULL,
|
||||
Node* parm4 = NULL, Node* parm5 = NULL,
|
||||
Node* parm6 = NULL, Node* parm7 = NULL);
|
||||
Node* parm6 = NULL, Node* parm7 = NULL,
|
||||
Node* parm8 = NULL);
|
||||
|
||||
Node* sign_extend_byte(Node* in);
|
||||
Node* sign_extend_short(Node* in);
|
||||
|
@ -6789,11 +6789,23 @@ bool LibraryCallKit::inline_galoisCounterMode_AESCrypt() {
|
||||
Node* state_start = array_element_address(state, intcon(0), T_LONG);
|
||||
Node* subkeyHtbl_start = array_element_address(subkeyHtbl, intcon(0), T_LONG);
|
||||
|
||||
ciKlass* klass = ciTypeArrayKlass::make(T_LONG);
|
||||
Node* klass_node = makecon(TypeKlassPtr::make(klass));
|
||||
|
||||
// htbl entries is set to 96 only fox x86-64
|
||||
if (Matcher::htbl_entries == -1) return false;
|
||||
|
||||
// new array to hold 48 computed htbl entries
|
||||
Node* subkeyHtbl_48_entries = new_array(klass_node, intcon(Matcher::htbl_entries), 0);
|
||||
if (subkeyHtbl_48_entries == NULL) return false;
|
||||
|
||||
Node* subkeyHtbl_48_entries_start = array_element_address(subkeyHtbl_48_entries, intcon(0), T_LONG);
|
||||
|
||||
// Call the stub, passing params
|
||||
Node* gcmCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::galoisCounterMode_aescrypt_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
in_start, len, ct_start, out_start, k_start, state_start, subkeyHtbl_start, cnt_start);
|
||||
in_start, len, ct_start, out_start, k_start, state_start, subkeyHtbl_start, subkeyHtbl_48_entries_start, cnt_start);
|
||||
|
||||
// return cipher length (int)
|
||||
Node* retvalue = _gvn.transform(new ProjNode(gcmCrypt, TypeFunc::Parms));
|
||||
|
@ -958,7 +958,7 @@ const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
|
||||
//for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
|
||||
const TypeFunc* OptoRuntime::galoisCounterMode_aescrypt_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 8;
|
||||
int num_args = 9;
|
||||
int argcnt = num_args;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
@ -969,6 +969,7 @@ const TypeFunc* OptoRuntime::galoisCounterMode_aescrypt_Type() {
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] key from AESCrypt obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // long[] state from GHASH obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // long[] subkeyHtbl from GHASH obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // long[] avx512_subkeyHtbl newly created
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] counter from GCTR obj
|
||||
|
||||
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
|
@ -122,7 +122,7 @@ final class GHASH implements Cloneable, GCM {
|
||||
|
||||
/* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */
|
||||
|
||||
// hashtable subkeyHtbl holds 2*57 powers of subkeyH computed using
|
||||
// hashtable subkeyHtbl holds 2*9 powers of subkeyH computed using
|
||||
// carry-less multiplication
|
||||
private long[] subkeyHtbl;
|
||||
|
||||
@ -143,9 +143,8 @@ final class GHASH implements Cloneable, GCM {
|
||||
throw new ProviderException("Internal error");
|
||||
}
|
||||
state = new long[2];
|
||||
// 48 keys for the interleaved implementation,
|
||||
// 8 for avx-ghash implementation and 1 for the original key
|
||||
subkeyHtbl = new long[2*57];
|
||||
// 8 for avx-ghash implementation and 1 for the original key
|
||||
subkeyHtbl = new long[2*9];
|
||||
subkeyHtbl[0] = (long)asLongView.get(subkeyH, 0);
|
||||
subkeyHtbl[1] = (long)asLongView.get(subkeyH, 8);
|
||||
}
|
||||
@ -266,7 +265,7 @@ final class GHASH implements Cloneable, GCM {
|
||||
throw new RuntimeException("internal state has invalid length: " +
|
||||
st.length);
|
||||
}
|
||||
if (subH.length != 114) {
|
||||
if (subH.length != 18) {
|
||||
throw new RuntimeException("internal subkeyHtbl has invalid length: " +
|
||||
subH.length);
|
||||
}
|
||||
|
@ -86,7 +86,9 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
// data size when buffer is divided up to aid in intrinsics
|
||||
private static final int TRIGGERLEN = 65536; // 64k
|
||||
// x86-64 parallel intrinsic data size
|
||||
private static final int PARALLEL_LEN = 768;
|
||||
private static final int PARALLEL_LEN = 8192;
|
||||
// max data size for x86-64 intrinsic
|
||||
private static final int SPLIT_LEN = 1048576; // 1MB
|
||||
|
||||
static final byte[] EMPTY_BUF = new byte[0];
|
||||
|
||||
@ -570,6 +572,28 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
return j0;
|
||||
}
|
||||
|
||||
// Wrapper function around AES-GCM interleaved intrinsic that splits
|
||||
// large chunks of data into 1MB sized chunks. This is to place
|
||||
// an upper limit on the number of blocks encrypted in the intrinsic.
|
||||
private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct,
|
||||
int ctOfs, byte[] out, int outOfs,
|
||||
GCTR gctr, GHASH ghash) {
|
||||
|
||||
int len = 0;
|
||||
if (inLen > SPLIT_LEN) {
|
||||
while (inLen >= SPLIT_LEN) {
|
||||
int partlen = implGCMCrypt0(in, inOfs + len, SPLIT_LEN, ct,
|
||||
ctOfs + len, out, outOfs + len, gctr, ghash);
|
||||
len += partlen;
|
||||
inLen -= partlen;
|
||||
}
|
||||
}
|
||||
if (inLen > 0) {
|
||||
len += implGCMCrypt0(in, inOfs + len, inLen, ct,
|
||||
ctOfs + len, out, outOfs + len, gctr, ghash);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
/**
|
||||
* Intrinsic for Vector AES Galois Counter Mode implementation.
|
||||
* AES and GHASH operations are interleaved in the intrinsic implementation.
|
||||
@ -590,7 +614,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* @return number of processed bytes
|
||||
*/
|
||||
@IntrinsicCandidate
|
||||
private static int implGCMCrypt(byte[] in, int inOfs, int inLen,
|
||||
private static int implGCMCrypt0(byte[] in, int inOfs, int inLen,
|
||||
byte[] ct, int ctOfs, byte[] out, int outOfs,
|
||||
GCTR gctr, GHASH ghash) {
|
||||
|
||||
|
@ -98,25 +98,25 @@
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DmsgSize=8320
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DmsgSize=8326
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 -DmsgSize=8326
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 -DmsgSize=8326
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DmsgSize=8326
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DmsgSize=2054
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DmsgSize=8326
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=2048
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=8320
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user