8267125: AES Galois CounterMode (GCM) interleaved implementation using AVX512 + VAES instructions
Co-authored-by: Smita Kamath <svkamath@openjdk.org> Co-authored-by: Tomasz Kantecki <tomasz.kantecki@intel.com> Co-authored-by: Anthony Scarpino <ascarpino@openjdk.org> Reviewed-by: kvn, valeriep
This commit is contained in:
parent
6ace805f8c
commit
0e7288ffbf
src
hotspot
cpu/x86
macroAssembler_x86.hppmacroAssembler_x86_aes.cppstubGenerator_x86_64.cppstubRoutines_x86.cppstubRoutines_x86.hpp
share
java.base/share/classes/com/sun/crypto/provider
test/hotspot/jtreg/compiler/codegen/aes
@ -945,12 +945,19 @@ private:
|
||||
void roundDec(XMMRegister key, int rnum);
|
||||
void lastroundDec(XMMRegister key, int rnum);
|
||||
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
|
||||
|
||||
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
|
||||
void generateHtbl_48_block_zmm(Register htbl);
|
||||
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
|
||||
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
|
||||
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
|
||||
bool final_reduction, int index, XMMRegister counter_inc_mask);
|
||||
public:
|
||||
void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
|
||||
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
|
||||
void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
|
||||
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
|
||||
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
|
||||
Register state, Register subkeyHtbl, Register counter);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1267,4 +1267,627 @@ void MacroAssembler::aesctr_encrypt(Register src_addr, Register dest_addr, Regis
|
||||
bind(EXIT);
|
||||
}
|
||||
|
||||
#endif // _LP64
|
||||
void MacroAssembler::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
|
||||
const XMMRegister TMP1 = xmm0;
|
||||
const XMMRegister TMP2 = xmm1;
|
||||
const XMMRegister TMP3 = xmm2;
|
||||
|
||||
evpclmulqdq(TMP1, GH, HK, 0x11, Assembler::AVX_512bit);
|
||||
evpclmulqdq(TMP2, GH, HK, 0x00, Assembler::AVX_512bit);
|
||||
evpclmulqdq(TMP3, GH, HK, 0x01, Assembler::AVX_512bit);
|
||||
evpclmulqdq(GH, GH, HK, 0x10, Assembler::AVX_512bit);
|
||||
evpxorq(GH, GH, TMP3, Assembler::AVX_512bit);
|
||||
vpsrldq(TMP3, GH, 8, Assembler::AVX_512bit);
|
||||
vpslldq(GH, GH, 8, Assembler::AVX_512bit);
|
||||
evpxorq(TMP1, TMP1, TMP3, Assembler::AVX_512bit);
|
||||
evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
|
||||
|
||||
evmovdquq(TMP3, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, r15);
|
||||
evpclmulqdq(TMP2, TMP3, GH, 0x01, Assembler::AVX_512bit);
|
||||
vpslldq(TMP2, TMP2, 8, Assembler::AVX_512bit);
|
||||
evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
|
||||
evpclmulqdq(TMP2, TMP3, GH, 0x00, Assembler::AVX_512bit);
|
||||
vpsrldq(TMP2, TMP2, 4, Assembler::AVX_512bit);
|
||||
evpclmulqdq(GH, TMP3, GH, 0x10, Assembler::AVX_512bit);
|
||||
vpslldq(GH, GH, 4, Assembler::AVX_512bit);
|
||||
vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
|
||||
}
|
||||
|
||||
void MacroAssembler::generateHtbl_48_block_zmm(Register htbl) {
|
||||
const XMMRegister HK = xmm6;
|
||||
const XMMRegister ZT5 = xmm4;
|
||||
const XMMRegister ZT7 = xmm7;
|
||||
const XMMRegister ZT8 = xmm8;
|
||||
|
||||
Label GFMUL_AVX512;
|
||||
|
||||
movdqu(HK, Address(htbl, 0));
|
||||
movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
vpshufb(HK, HK, xmm10, Assembler::AVX_128bit);
|
||||
|
||||
movdqu(xmm11, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 64)); // Poly
|
||||
movdqu(xmm12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 80)); // Twoone
|
||||
// Compute H ^ 2 from the input subkeyH
|
||||
movdqu(xmm2, xmm6);
|
||||
vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit);
|
||||
vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit);
|
||||
movdqu(xmm1, xmm2);
|
||||
vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit);
|
||||
vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit);
|
||||
vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
|
||||
|
||||
vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit);
|
||||
vpcmpeqd(xmm2, xmm2, xmm12, AVX_128bit);
|
||||
vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit);
|
||||
vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
|
||||
movdqu(Address(htbl, 16 * 56), xmm6); // H ^ 2
|
||||
// Compute the remaining three powers of H using XMM registers and all following powers using ZMM
|
||||
movdqu(ZT5, HK);
|
||||
vinserti32x4(ZT7, ZT7, HK, 3);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 55), ZT5); // H ^ 2 * 2
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 2);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 54), ZT5); // H ^ 2 * 3
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 1);
|
||||
|
||||
gfmul_avx512(ZT5, HK);
|
||||
movdqu(Address(htbl, 16 * 53), ZT5); // H ^ 2 * 4
|
||||
vinserti32x4(ZT7, ZT7, ZT5, 0);
|
||||
|
||||
evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit);
|
||||
evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 49), ZT7, Assembler::AVX_512bit);
|
||||
evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 45), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 41), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 37), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 33), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 29), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 25), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 21), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 17), ZT7, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT8, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 13), ZT8, Assembler::AVX_512bit);
|
||||
gfmul_avx512(ZT7, ZT5);
|
||||
evmovdquq(Address(htbl, 16 * 9), ZT7, Assembler::AVX_512bit);
|
||||
ret(0);
|
||||
}
|
||||
|
||||
#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \
|
||||
evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \
|
||||
vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \
|
||||
evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \
|
||||
evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \
|
||||
vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \
|
||||
evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \
|
||||
vpslldq(out, out, 4, Assembler::AVX_512bit); \
|
||||
vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \
|
||||
|
||||
#define vhpxori4x128(reg, tmp) \
|
||||
vextracti64x4(tmp, reg, 1); \
|
||||
evpxorq(reg, reg, tmp, Assembler::AVX_256bit); \
|
||||
vextracti32x4(tmp, reg, 1); \
|
||||
evpxorq(reg, reg, tmp, Assembler::AVX_128bit); \
|
||||
|
||||
#define roundEncode(key, dst1, dst2, dst3, dst4) \
|
||||
vaesenc(dst1, dst1, key, Assembler::AVX_512bit); \
|
||||
vaesenc(dst2, dst2, key, Assembler::AVX_512bit); \
|
||||
vaesenc(dst3, dst3, key, Assembler::AVX_512bit); \
|
||||
vaesenc(dst4, dst4, key, Assembler::AVX_512bit); \
|
||||
|
||||
#define lastroundEncode(key, dst1, dst2, dst3, dst4) \
|
||||
vaesenclast(dst1, dst1, key, Assembler::AVX_512bit); \
|
||||
vaesenclast(dst2, dst2, key, Assembler::AVX_512bit); \
|
||||
vaesenclast(dst3, dst3, key, Assembler::AVX_512bit); \
|
||||
vaesenclast(dst4, dst4, key, Assembler::AVX_512bit); \
|
||||
|
||||
#define storeData(dst, position, src1, src2, src3, src4) \
|
||||
evmovdquq(Address(dst, position, Address::times_1, 0 * 64), src1, Assembler::AVX_512bit); \
|
||||
evmovdquq(Address(dst, position, Address::times_1, 1 * 64), src2, Assembler::AVX_512bit); \
|
||||
evmovdquq(Address(dst, position, Address::times_1, 2 * 64), src3, Assembler::AVX_512bit); \
|
||||
evmovdquq(Address(dst, position, Address::times_1, 3 * 64), src4, Assembler::AVX_512bit); \
|
||||
|
||||
#define loadData(src, position, dst1, dst2, dst3, dst4) \
|
||||
evmovdquq(dst1, Address(src, position, Address::times_1, 0 * 64), Assembler::AVX_512bit); \
|
||||
evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler::AVX_512bit); \
|
||||
evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \
|
||||
evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \
|
||||
|
||||
#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \
|
||||
evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \
|
||||
evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \
|
||||
evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \
|
||||
evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \
|
||||
|
||||
#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \
|
||||
vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \
|
||||
evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \
|
||||
vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \
|
||||
evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \
|
||||
vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \
|
||||
evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \
|
||||
vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \
|
||||
evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \
|
||||
|
||||
#define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
|
||||
evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \
|
||||
evpxorq(dst1, dst1, src1, Assembler::AVX_512bit); \
|
||||
evpxorq(dst2, dst2, src2, Assembler::AVX_512bit); \
|
||||
evpxorq(dst3, dst3, src3, Assembler::AVX_512bit); \
|
||||
|
||||
#define xorGHASH(dst0, dst1, dst2, dst3, src02, src03, src12, src13, src22, src23, src32, src33) \
|
||||
vpternlogq(dst0, 0x96, src02, src03, Assembler::AVX_512bit); \
|
||||
vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \
|
||||
vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \
|
||||
vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \
|
||||
|
||||
void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx,
|
||||
Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds,
|
||||
Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) {
|
||||
|
||||
Label AES_192, AES_256, LAST_AES_RND;
|
||||
const XMMRegister ZTMP0 = xmm0;
|
||||
const XMMRegister ZTMP1 = xmm3;
|
||||
const XMMRegister ZTMP2 = xmm4;
|
||||
const XMMRegister ZTMP3 = xmm5;
|
||||
const XMMRegister ZTMP5 = xmm7;
|
||||
const XMMRegister ZTMP6 = xmm10;
|
||||
const XMMRegister ZTMP7 = xmm11;
|
||||
const XMMRegister ZTMP8 = xmm12;
|
||||
const XMMRegister ZTMP9 = xmm13;
|
||||
const XMMRegister ZTMP10 = xmm15;
|
||||
const XMMRegister ZTMP11 = xmm16;
|
||||
const XMMRegister ZTMP12 = xmm17;
|
||||
|
||||
const XMMRegister ZTMP13 = xmm19;
|
||||
const XMMRegister ZTMP14 = xmm20;
|
||||
const XMMRegister ZTMP15 = xmm21;
|
||||
const XMMRegister ZTMP16 = xmm30;
|
||||
const XMMRegister ZTMP17 = xmm31;
|
||||
const XMMRegister ZTMP18 = xmm1;
|
||||
const XMMRegister ZTMP19 = xmm2;
|
||||
const XMMRegister ZTMP20 = xmm8;
|
||||
const XMMRegister ZTMP21 = xmm22;
|
||||
const XMMRegister ZTMP22 = xmm23;
|
||||
|
||||
// Pre increment counters
|
||||
vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit);
|
||||
// Save counter value
|
||||
evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit);
|
||||
|
||||
// Reuse ZTMP17 / ZTMP18 for loading AES Keys
|
||||
// Pre-load AES round keys
|
||||
ev_load_key(ZTMP17, key, 0, xmm29);
|
||||
ev_load_key(ZTMP18, key, 1 * 16, xmm29);
|
||||
|
||||
// ZTMP19 & ZTMP20 used for loading hash key
|
||||
// Pre-load hash key
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
// Load data for computing ghash
|
||||
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// Xor cipher block 0 with input ghash, if available
|
||||
if (ghash_input) {
|
||||
evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit);
|
||||
}
|
||||
// Load data for computing ghash
|
||||
evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// stitch AES rounds with GHASH
|
||||
// AES round 0, xmm24 has shuffle mask
|
||||
shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17);
|
||||
// Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys
|
||||
ev_load_key(ZTMP17, key, 2 * 16, xmm29);
|
||||
// GHASH 4 blocks
|
||||
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19);
|
||||
// Load the next hkey and Ghash data
|
||||
evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// AES round 1
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 3 * 16, xmm29);
|
||||
|
||||
// GHASH 4 blocks(11 to 8)
|
||||
carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
|
||||
// Load the next hkey and GDATA
|
||||
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// AES round 2
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 4 * 16, xmm29);
|
||||
|
||||
// GHASH 4 blocks(7 to 4)
|
||||
carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19);
|
||||
// AES rounds 3
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 5 * 16, xmm29);
|
||||
|
||||
// Gather(XOR) GHASH for 12 blocks
|
||||
xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15);
|
||||
|
||||
// AES rounds 4
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 6 * 16, xmm29);
|
||||
|
||||
// load plain / cipher text(recycle registers)
|
||||
loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
|
||||
|
||||
// AES rounds 5
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 7 * 16, xmm29);
|
||||
// GHASH 4 blocks(3 to 0)
|
||||
carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
|
||||
|
||||
// AES round 6
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 8 * 16, xmm29);
|
||||
|
||||
// gather GHASH in ZTMP6(low) and ZTMP5(high)
|
||||
if (first_time_reduction) {
|
||||
vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
|
||||
evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit);
|
||||
evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit);
|
||||
evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit);
|
||||
}
|
||||
else if (!first_time_reduction && !final_reduction) {
|
||||
xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10);
|
||||
}
|
||||
|
||||
if (final_reduction) {
|
||||
// Phase one: Add mid products together
|
||||
// Also load polynomial constant for reduction
|
||||
vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
|
||||
vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit);
|
||||
vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit);
|
||||
vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
|
||||
}
|
||||
// AES round 7
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 9 * 16, xmm29);
|
||||
if (final_reduction) {
|
||||
vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit);
|
||||
evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit);
|
||||
vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit);
|
||||
evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit);
|
||||
}
|
||||
// AES round 8
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 10 * 16, xmm29);
|
||||
|
||||
// Horizontal xor of low and high 4*128
|
||||
if (final_reduction) {
|
||||
vhpxori4x128(ZTMP5, ZTMP9);
|
||||
vhpxori4x128(ZTMP6, ZTMP10);
|
||||
}
|
||||
// AES round 9
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
// First phase of reduction
|
||||
if (final_reduction) {
|
||||
evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit);
|
||||
vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit);
|
||||
evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit);
|
||||
}
|
||||
cmpl(rounds, 52);
|
||||
jcc(Assembler::greaterEqual, AES_192);
|
||||
jmp(LAST_AES_RND);
|
||||
// AES rounds upto 11 (AES192) or 13 (AES256)
|
||||
bind(AES_192);
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 11 * 16, xmm29);
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 12 * 16, xmm29);
|
||||
cmpl(rounds, 60);
|
||||
jcc(Assembler::aboveEqual, AES_256);
|
||||
jmp(LAST_AES_RND);
|
||||
|
||||
bind(AES_256);
|
||||
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP18, key, 13 * 16, xmm29);
|
||||
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
ev_load_key(ZTMP17, key, 14 * 16, xmm29);
|
||||
|
||||
bind(LAST_AES_RND);
|
||||
// Second phase of reduction
|
||||
if (final_reduction) {
|
||||
evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit);
|
||||
vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R
|
||||
evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit);
|
||||
vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result
|
||||
// ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9
|
||||
vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit);
|
||||
}
|
||||
// Last AES round
|
||||
lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
// XOR against plain / cipher text
|
||||
xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
|
||||
// store cipher / plain text
|
||||
storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
}
|
||||
|
||||
void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
|
||||
Register state, Register subkeyHtbl, Register counter) {
|
||||
|
||||
Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32,
|
||||
AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16;
|
||||
const XMMRegister CTR_BLOCKx = xmm9;
|
||||
const XMMRegister AAD_HASHx = xmm14;
|
||||
const Register pos = rax;
|
||||
const Register rounds = r15;
|
||||
Register ghash_pos;
|
||||
#ifndef _WIN64
|
||||
ghash_pos = r14;
|
||||
#else
|
||||
ghash_pos = r11;
|
||||
#endif // !_WIN64
|
||||
const XMMRegister ZTMP0 = xmm0;
|
||||
const XMMRegister ZTMP1 = xmm3;
|
||||
const XMMRegister ZTMP2 = xmm4;
|
||||
const XMMRegister ZTMP3 = xmm5;
|
||||
const XMMRegister ZTMP4 = xmm6;
|
||||
const XMMRegister ZTMP5 = xmm7;
|
||||
const XMMRegister ZTMP6 = xmm10;
|
||||
const XMMRegister ZTMP7 = xmm11;
|
||||
const XMMRegister ZTMP8 = xmm12;
|
||||
const XMMRegister ZTMP9 = xmm13;
|
||||
const XMMRegister ZTMP10 = xmm15;
|
||||
const XMMRegister ZTMP11 = xmm16;
|
||||
const XMMRegister ZTMP12 = xmm17;
|
||||
const XMMRegister ZTMP13 = xmm19;
|
||||
const XMMRegister ZTMP14 = xmm20;
|
||||
const XMMRegister ZTMP15 = xmm21;
|
||||
const XMMRegister ZTMP16 = xmm30;
|
||||
const XMMRegister COUNTER_INC_MASK = xmm18;
|
||||
|
||||
movl(pos, 0); // Total length processed
|
||||
// Min data size processed = 768 bytes
|
||||
cmpl(len, 768);
|
||||
jcc(Assembler::less, ENC_DEC_DONE);
|
||||
|
||||
// Generate 48 constants for htbl
|
||||
call(GENERATE_HTBL_48_BLKS, relocInfo::none);
|
||||
int index = 0; // Index for choosing subkeyHtbl entry
|
||||
movl(ghash_pos, 0); // Pointer for ghash read and store operations
|
||||
|
||||
// Move initial counter value and STATE value into variables
|
||||
movdqu(CTR_BLOCKx, Address(counter, 0));
|
||||
movdqu(AAD_HASHx, Address(state, 0));
|
||||
// Load lswap mask for ghash
|
||||
movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()), rbx);
|
||||
// Shuffle input state using lswap mask
|
||||
vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
|
||||
|
||||
// Compute #rounds for AES based on the length of the key array
|
||||
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
// Broadcast counter value to 512 bit register
|
||||
evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit);
|
||||
// Load counter shuffle mask
|
||||
evmovdquq(xmm24, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, rbx);
|
||||
// Shuffle counter
|
||||
vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
|
||||
|
||||
// Load mask for incrementing counter
|
||||
evmovdquq(COUNTER_INC_MASK, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, rbx);
|
||||
// Pre-increment counter
|
||||
vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, rbx);
|
||||
vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
|
||||
// Begin 32 blocks of AES processing
|
||||
bind(AES_32_BLOCKS);
|
||||
// Save incremented counter before overwriting it with AES data
|
||||
evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit);
|
||||
|
||||
// Move 256 bytes of data
|
||||
loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
// Load key shuffle mask
|
||||
movdqu(xmm29, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx);
|
||||
// Load 0th AES round key
|
||||
ev_load_key(ZTMP4, key, 0, xmm29);
|
||||
// AES-ROUND0, xmm24 has the shuffle mask
|
||||
shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4);
|
||||
|
||||
for (int j = 1; j < 10; j++) {
|
||||
ev_load_key(ZTMP4, key, j * 16, xmm29);
|
||||
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
}
|
||||
ev_load_key(ZTMP4, key, 10 * 16, xmm29);
|
||||
// AES rounds upto 11 (AES192) or 13 (AES256)
|
||||
cmpl(rounds, 52);
|
||||
jcc(Assembler::greaterEqual, AES_192);
|
||||
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
jmp(STORE_CT);
|
||||
|
||||
bind(AES_192);
|
||||
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
ev_load_key(ZTMP4, key, 11 * 16, xmm29);
|
||||
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
cmpl(rounds, 60);
|
||||
jcc(Assembler::aboveEqual, AES_256);
|
||||
ev_load_key(ZTMP4, key, 12 * 16, xmm29);
|
||||
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
jmp(STORE_CT);
|
||||
|
||||
bind(AES_256);
|
||||
ev_load_key(ZTMP4, key, 12 * 16, xmm29);
|
||||
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
ev_load_key(ZTMP4, key, 13 * 16, xmm29);
|
||||
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
ev_load_key(ZTMP4, key, 14 * 16, xmm29);
|
||||
// Last AES round
|
||||
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
|
||||
bind(STORE_CT);
|
||||
// Xor the encrypted key with PT to obtain CT
|
||||
xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
|
||||
storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
|
||||
// 16 blocks encryption completed
|
||||
addl(pos, 256);
|
||||
cmpl(pos, 512);
|
||||
jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL);
|
||||
vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
|
||||
jmp(AES_32_BLOCKS);
|
||||
|
||||
bind(GHASH_AES_PARALLEL);
|
||||
// Ghash16_encrypt16_parallel takes place in the order with three reduction values:
|
||||
// 1) First time -> cipher xor input ghash
|
||||
// 2) No reduction -> accumulate multiplication values
|
||||
// 3) Final reduction post 48 blocks -> new ghash value is computed for the next round
|
||||
// Reduction value = first time
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
|
||||
// At this point we have processed 768 bytes of AES and 256 bytes of GHASH.
|
||||
// If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code
|
||||
subl(len, 768);
|
||||
cmpl(len, 768);
|
||||
jcc(Assembler::less, GHASH_LAST_32);
|
||||
|
||||
// AES 16 blocks and GHASH 16 blocks in parallel
|
||||
// For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times
|
||||
// Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations
|
||||
// Each call uses 4 subkeyHtbl values, so increment the index by 4.
|
||||
bind(GHASH_16_AES_16);
|
||||
// Reduction value = no reduction
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
// Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
// Calculated ghash value needs to be moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline
|
||||
movdqu(AAD_HASHx, ZTMP5);
|
||||
index = 0; // Reset subkeyHtbl index
|
||||
|
||||
// Restart the pipeline
|
||||
// Reduction value = first time
|
||||
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
|
||||
addl(pos, 256);
|
||||
addl(ghash_pos, 256);
|
||||
index += 4;
|
||||
|
||||
subl(len, 768);
|
||||
cmpl(len, 768);
|
||||
jcc(Assembler::greaterEqual, GHASH_16_AES_16);
|
||||
|
||||
// GHASH last 32 blocks processed here
|
||||
// GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used
|
||||
bind(GHASH_LAST_32);
|
||||
// Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl
|
||||
movl(rbx, 256);
|
||||
// Load cipher blocks
|
||||
evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
|
||||
// Load ghash keys
|
||||
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
|
||||
|
||||
// Ghash blocks 0 - 3
|
||||
carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15);
|
||||
// Ghash blocks 4 - 7
|
||||
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16);
|
||||
|
||||
vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27
|
||||
vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26
|
||||
vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25
|
||||
evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8
|
||||
|
||||
addl(ghash_pos, 128);
|
||||
addl(rbx, 128);
|
||||
|
||||
// Ghash remaining blocks
|
||||
bind(LOOP);
|
||||
cmpl(ghash_pos, pos);
|
||||
jcc(Assembler::aboveEqual, ACCUMULATE);
|
||||
// Load next cipher blocks and corresponding ghash keys
|
||||
evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
|
||||
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
|
||||
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
|
||||
|
||||
// ghash blocks 0 - 3
|
||||
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15);
|
||||
|
||||
// ghash blocks 4 - 7
|
||||
carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16);
|
||||
|
||||
// update sums
|
||||
// ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9
|
||||
// ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10
|
||||
// ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11
|
||||
// ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12
|
||||
xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12);
|
||||
addl(ghash_pos, 128);
|
||||
addl(rbx, 128);
|
||||
jmp(LOOP);
|
||||
|
||||
// Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2
|
||||
bind(ACCUMULATE);
|
||||
evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit);
|
||||
vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit);
|
||||
vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit);
|
||||
evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit);
|
||||
evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit);
|
||||
|
||||
// Add ZTMP1 and ZTMP2 128 - bit words horizontally
|
||||
vhpxori4x128(ZTMP1, ZTMP11);
|
||||
vhpxori4x128(ZTMP2, ZTMP12);
|
||||
// Load reduction polynomial and compute final reduction
|
||||
evmovdquq(ZTMP15, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
|
||||
vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4);
|
||||
|
||||
// Pre-increment counter for next operation
|
||||
vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit);
|
||||
// Shuffle counter and save the updated value
|
||||
vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
|
||||
movdqu(Address(counter, 0), CTR_BLOCKx);
|
||||
// Load ghash lswap mask
|
||||
movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
// Shuffle ghash using lbswap_mask and store it
|
||||
vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
|
||||
movdqu(Address(state, 0), AAD_HASHx);
|
||||
jmp(ENC_DEC_DONE);
|
||||
|
||||
bind(GENERATE_HTBL_48_BLKS);
|
||||
generateHtbl_48_block_zmm(subkeyHtbl);
|
||||
|
||||
bind(ENC_DEC_DONE);
|
||||
movq(rax, pos);
|
||||
}
|
||||
|
||||
#endif // _LP64
|
@ -4368,6 +4368,95 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address ghash_polynomial512_addr() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
|
||||
address start = __ pc();
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x00000001C2000000, relocInfo::none);
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none); // POLY
|
||||
__ emit_data64(0xC200000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
|
||||
__ emit_data64(0x0000000100000000, relocInfo::none);
|
||||
return start;
|
||||
}
|
||||
|
||||
// Vector AES Galois Counter Mode implementation. Parameters:
|
||||
// Windows regs | Linux regs
|
||||
// in = c_rarg0 (rcx) | c_rarg0 (rsi)
|
||||
// len = c_rarg1 (rdx) | c_rarg1 (rdi)
|
||||
// ct = c_rarg2 (r8) | c_rarg2 (rdx)
|
||||
// out = c_rarg3 (r9) | c_rarg3 (rcx)
|
||||
// key = r10 | c_rarg4 (r8)
|
||||
// state = r13 | c_rarg5 (r9)
|
||||
// subkeyHtbl = r14 | r11
|
||||
// counter = rsi | r12
|
||||
// return - number of processed bytes
|
||||
address generate_galoisCounterMode_AESCrypt() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
|
||||
address start = __ pc();
|
||||
const Register in = c_rarg0;
|
||||
const Register len = c_rarg1;
|
||||
const Register ct = c_rarg2;
|
||||
const Register out = c_rarg3;
|
||||
// and updated with the incremented counter in the end
|
||||
#ifndef _WIN64
|
||||
const Register key = c_rarg4;
|
||||
const Register state = c_rarg5;
|
||||
const Address subkeyH_mem(rbp, 2 * wordSize);
|
||||
const Register subkeyHtbl = r11;
|
||||
const Address counter_mem(rbp, 3 * wordSize);
|
||||
const Register counter = r12;
|
||||
#else
|
||||
const Address key_mem(rbp, 6 * wordSize);
|
||||
const Register key = r10;
|
||||
const Address state_mem(rbp, 7 * wordSize);
|
||||
const Register state = r13;
|
||||
const Address subkeyH_mem(rbp, 8 * wordSize);
|
||||
const Register subkeyHtbl = r14;
|
||||
const Address counter_mem(rbp, 9 * wordSize);
|
||||
const Register counter = rsi;
|
||||
#endif
|
||||
__ enter();
|
||||
// Save state before entering routine
|
||||
__ push(r12);
|
||||
__ push(r13);
|
||||
__ push(r14);
|
||||
__ push(r15);
|
||||
__ push(rbx);
|
||||
#ifdef _WIN64
|
||||
// on win64, fill len_reg from stack position
|
||||
__ push(rsi);
|
||||
__ movptr(key, key_mem);
|
||||
__ movptr(state, state_mem);
|
||||
#endif
|
||||
__ movptr(subkeyHtbl, subkeyH_mem);
|
||||
__ movptr(counter, counter_mem);
|
||||
|
||||
__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, counter);
|
||||
|
||||
// Restore state before leaving routine
|
||||
#ifdef _WIN64
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
__ pop(rbx);
|
||||
__ pop(r15);
|
||||
__ pop(r14);
|
||||
__ pop(r13);
|
||||
__ pop(r12);
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// This mask is used for incrementing counter value(linc0, linc4, etc.)
|
||||
address counter_mask_addr() {
|
||||
__ align(64);
|
||||
@ -7618,13 +7707,20 @@ address generate_avx_ghash_processBlocks() {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
|
||||
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
|
||||
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
|
||||
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
|
||||
StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
|
||||
} else {
|
||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||
}
|
||||
}
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
|
||||
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
|
||||
if (StubRoutines::x86::_counter_mask_addr == NULL) {
|
||||
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
|
||||
}
|
||||
StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
|
||||
} else {
|
||||
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
|
||||
@ -7664,7 +7760,9 @@ address generate_avx_ghash_processBlocks() {
|
||||
|
||||
// Generate GHASH intrinsics code
|
||||
if (UseGHASHIntrinsics) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) {
|
||||
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||
}
|
||||
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
|
||||
if (VM_Version::supports_avx()) {
|
||||
StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();
|
||||
|
@ -80,6 +80,7 @@ address StubRoutines::x86::_join_0_1_base64 = NULL;
|
||||
address StubRoutines::x86::_join_1_2_base64 = NULL;
|
||||
address StubRoutines::x86::_join_2_3_base64 = NULL;
|
||||
address StubRoutines::x86::_decoding_table_base64 = NULL;
|
||||
address StubRoutines::x86::_ghash_poly512_addr = NULL;
|
||||
#endif
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
|
@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
|
||||
|
||||
enum platform_dependent_constants {
|
||||
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
class x86 {
|
||||
@ -198,6 +198,7 @@ class x86 {
|
||||
static address _join_1_2_base64;
|
||||
static address _join_2_3_base64;
|
||||
static address _decoding_table_base64;
|
||||
static address _ghash_poly512_addr;
|
||||
#endif
|
||||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
@ -254,6 +255,7 @@ class x86 {
|
||||
static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; }
|
||||
static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; }
|
||||
static address crc_table_avx512_addr() { return (address)_crc_table_avx512; }
|
||||
static address ghash_polynomial512_addr() { return _ghash_poly512_addr; }
|
||||
#endif // _LP64
|
||||
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||
|
@ -182,6 +182,7 @@ int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
|
||||
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
|
||||
case vmIntrinsics::_counterMode_AESCrypt:
|
||||
case vmIntrinsics::_galoisCounterMode_AESCrypt:
|
||||
return 1;
|
||||
case vmIntrinsics::_digestBase_implCompressMB:
|
||||
return 5;
|
||||
@ -429,6 +430,9 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_counterMode_AESCrypt:
|
||||
if (!UseAESCTRIntrinsics) return true;
|
||||
break;
|
||||
case vmIntrinsics::_galoisCounterMode_AESCrypt:
|
||||
if (!UseAESIntrinsics) return true;
|
||||
break;
|
||||
case vmIntrinsics::_md5_implCompress:
|
||||
if (!UseMD5Intrinsics) return true;
|
||||
break;
|
||||
|
@ -415,6 +415,11 @@ class methodHandle;
|
||||
do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
|
||||
do_name( crypt_name, "implCrypt") \
|
||||
\
|
||||
do_class(com_sun_crypto_provider_galoisCounterMode, "com/sun/crypto/provider/GaloisCounterMode") \
|
||||
do_intrinsic(_galoisCounterMode_AESCrypt, com_sun_crypto_provider_galoisCounterMode, gcm_crypt_name, aes_gcm_signature, F_S) \
|
||||
do_name(gcm_crypt_name, "implGCMCrypt") \
|
||||
do_signature(aes_gcm_signature, "([BII[BI[BILcom/sun/crypto/provider/GCTR;Lcom/sun/crypto/provider/GHASH;)I") \
|
||||
\
|
||||
/* support for sun.security.provider.MD5 */ \
|
||||
do_class(sun_security_provider_md5, "sun/security/provider/MD5") \
|
||||
do_intrinsic(_md5_implCompress, sun_security_provider_md5, implCompress_name, implCompress_signature, F_R) \
|
||||
|
@ -308,6 +308,7 @@
|
||||
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
|
||||
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
|
||||
static_field(StubRoutines, _counterMode_AESCrypt, address) \
|
||||
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
|
||||
static_field(StubRoutines, _base64_encodeBlock, address) \
|
||||
static_field(StubRoutines, _base64_decodeBlock, address) \
|
||||
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||
|
@ -640,6 +640,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
|
||||
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
|
||||
case vmIntrinsics::_counterMode_AESCrypt:
|
||||
case vmIntrinsics::_galoisCounterMode_AESCrypt:
|
||||
case vmIntrinsics::_md5_implCompress:
|
||||
case vmIntrinsics::_sha_implCompress:
|
||||
case vmIntrinsics::_sha2_implCompress:
|
||||
|
@ -1087,6 +1087,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_encryptAESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "decodeBlock") == 0 ||
|
||||
|
@ -2535,7 +2535,7 @@ Node* GraphKit::make_runtime_call(int flags,
|
||||
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
|
||||
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
|
||||
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
|
||||
/* close each nested if ===> */ } } } } } } } }
|
||||
/* close each nested if ===> */ } } } } } } } }
|
||||
assert(call->in(call->req()-1) != NULL, "must initialize all parms");
|
||||
|
||||
if (!is_leaf) {
|
||||
|
@ -547,6 +547,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_counterMode_AESCrypt:
|
||||
return inline_counterMode_AESCrypt(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_galoisCounterMode_AESCrypt:
|
||||
return inline_galoisCounterMode_AESCrypt();
|
||||
|
||||
case vmIntrinsics::_md5_implCompress:
|
||||
case vmIntrinsics::_sha_implCompress:
|
||||
case vmIntrinsics::_sha2_implCompress:
|
||||
@ -713,6 +716,8 @@ Node* LibraryCallKit::try_to_predicate(int predicate) {
|
||||
return inline_counterMode_AESCrypt_predicate();
|
||||
case vmIntrinsics::_digestBase_implCompressMB:
|
||||
return inline_digestBase_implCompressMB_predicate(predicate);
|
||||
case vmIntrinsics::_galoisCounterMode_AESCrypt:
|
||||
return inline_galoisCounterMode_AESCrypt_predicate();
|
||||
|
||||
default:
|
||||
// If you get here, it may be that someone has added a new intrinsic
|
||||
@ -6679,6 +6684,134 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(Node* digestBase_obj, ciIn
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------inline_galoisCounterMode_AESCrypt-----------------------
|
||||
bool LibraryCallKit::inline_galoisCounterMode_AESCrypt() {
|
||||
assert(UseAES, "need AES instruction support");
|
||||
address stubAddr = NULL;
|
||||
const char *stubName = NULL;
|
||||
stubAddr = StubRoutines::galoisCounterMode_AESCrypt();
|
||||
stubName = "galoisCounterMode_AESCrypt";
|
||||
|
||||
if (stubAddr == NULL) return false;
|
||||
|
||||
Node* in = argument(0);
|
||||
Node* inOfs = argument(1);
|
||||
Node* len = argument(2);
|
||||
Node* ct = argument(3);
|
||||
Node* ctOfs = argument(4);
|
||||
Node* out = argument(5);
|
||||
Node* outOfs = argument(6);
|
||||
Node* gctr_object = argument(7);
|
||||
Node* ghash_object = argument(8);
|
||||
|
||||
// (1) in, ct and out are arrays.
|
||||
const Type* in_type = in->Value(&_gvn);
|
||||
const Type* ct_type = ct->Value(&_gvn);
|
||||
const Type* out_type = out->Value(&_gvn);
|
||||
const TypeAryPtr* top_in = in_type->isa_aryptr();
|
||||
const TypeAryPtr* top_ct = ct_type->isa_aryptr();
|
||||
const TypeAryPtr* top_out = out_type->isa_aryptr();
|
||||
assert(top_in != NULL && top_in->klass() != NULL &&
|
||||
top_ct != NULL && top_ct->klass() != NULL &&
|
||||
top_out != NULL && top_out->klass() != NULL, "args are strange");
|
||||
|
||||
// checks are the responsibility of the caller
|
||||
Node* in_start = in;
|
||||
Node* ct_start = ct;
|
||||
Node* out_start = out;
|
||||
if (inOfs != NULL || ctOfs != NULL || outOfs != NULL) {
|
||||
assert(inOfs != NULL && ctOfs != NULL && outOfs != NULL, "");
|
||||
in_start = array_element_address(in, inOfs, T_BYTE);
|
||||
ct_start = array_element_address(ct, ctOfs, T_BYTE);
|
||||
out_start = array_element_address(out, outOfs, T_BYTE);
|
||||
}
|
||||
|
||||
// if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
|
||||
// (because of the predicated logic executed earlier).
|
||||
// so we cast it here safely.
|
||||
// this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
|
||||
Node* embeddedCipherObj = load_field_from_object(gctr_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;");
|
||||
Node* counter = load_field_from_object(gctr_object, "counter", "[B");
|
||||
Node* subkeyHtbl = load_field_from_object(ghash_object, "subkeyHtbl", "[J");
|
||||
Node* state = load_field_from_object(ghash_object, "state", "[J");
|
||||
|
||||
if (embeddedCipherObj == NULL || counter == NULL || subkeyHtbl == NULL || state == NULL) {
|
||||
return false;
|
||||
}
|
||||
// cast it to what we know it will be at runtime
|
||||
const TypeInstPtr* tinst = _gvn.type(gctr_object)->isa_instptr();
|
||||
assert(tinst != NULL, "GCTR obj is null");
|
||||
assert(tinst->klass()->is_loaded(), "GCTR obj is not loaded");
|
||||
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
|
||||
assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
|
||||
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
|
||||
const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
|
||||
const TypeOopPtr* xtype = aklass->as_instance_type();
|
||||
Node* aescrypt_object = new CheckCastPPNode(control(), embeddedCipherObj, xtype);
|
||||
aescrypt_object = _gvn.transform(aescrypt_object);
|
||||
// we need to get the start of the aescrypt_object's expanded key array
|
||||
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
|
||||
if (k_start == NULL) return false;
|
||||
|
||||
// similarly, get the start address of the r vector
|
||||
Node* cnt_start = array_element_address(counter, intcon(0), T_BYTE);
|
||||
Node* state_start = array_element_address(state, intcon(0), T_LONG);
|
||||
Node* subkeyHtbl_start = array_element_address(subkeyHtbl, intcon(0), T_LONG);
|
||||
|
||||
// Call the stub, passing params
|
||||
Node* gcmCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||
OptoRuntime::galoisCounterMode_aescrypt_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
in_start, len, ct_start, out_start, k_start, state_start, subkeyHtbl_start, cnt_start);
|
||||
|
||||
// return cipher length (int)
|
||||
Node* retvalue = _gvn.transform(new ProjNode(gcmCrypt, TypeFunc::Parms));
|
||||
set_result(retvalue);
|
||||
return true;
|
||||
}
|
||||
|
||||
//----------------------------inline_galoisCounterMode_AESCrypt_predicate----------------------------
|
||||
// Return node representing slow path of predicate check.
|
||||
// the pseudo code we want to emulate with this predicate is:
|
||||
// for encryption:
|
||||
// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
|
||||
// for decryption:
|
||||
// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
|
||||
// note cipher==plain is more conservative than the original java code but that's OK
|
||||
//
|
||||
|
||||
Node* LibraryCallKit::inline_galoisCounterMode_AESCrypt_predicate() {
|
||||
// The receiver was checked for NULL already.
|
||||
Node* objGCTR = argument(7);
|
||||
// Load embeddedCipher field of GCTR object.
|
||||
Node* embeddedCipherObj = load_field_from_object(objGCTR, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;");
|
||||
assert(embeddedCipherObj != NULL, "embeddedCipherObj is null");
|
||||
|
||||
// get AESCrypt klass for instanceOf check
|
||||
// AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
|
||||
// will have same classloader as CipherBlockChaining object
|
||||
const TypeInstPtr* tinst = _gvn.type(objGCTR)->isa_instptr();
|
||||
assert(tinst != NULL, "GCTR obj is null");
|
||||
assert(tinst->klass()->is_loaded(), "GCTR obj is not loaded");
|
||||
|
||||
// we want to do an instanceof comparison against the AESCrypt class
|
||||
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
|
||||
if (!klass_AESCrypt->is_loaded()) {
|
||||
// if AESCrypt is not even loaded, we never take the intrinsic fast path
|
||||
Node* ctrl = control();
|
||||
set_control(top()); // no regular fast path
|
||||
return ctrl;
|
||||
}
|
||||
|
||||
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
|
||||
Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
|
||||
Node* cmp_instof = _gvn.transform(new CmpINode(instof, intcon(1)));
|
||||
Node* bool_instof = _gvn.transform(new BoolNode(cmp_instof, BoolTest::ne));
|
||||
Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
|
||||
|
||||
return instof_false; // even if it is NULL
|
||||
}
|
||||
|
||||
//------------------------------get_state_from_digest_object-----------------------
|
||||
Node * LibraryCallKit::get_state_from_digest_object(Node *digest_object, const char *state_type) {
|
||||
Node* digest_state = load_field_from_object(digest_object, "state", state_type);
|
||||
|
@ -305,6 +305,8 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_fma(vmIntrinsics::ID id);
|
||||
bool inline_character_compare(vmIntrinsics::ID id);
|
||||
bool inline_fp_min_max(vmIntrinsics::ID id);
|
||||
bool inline_galoisCounterMode_AESCrypt();
|
||||
Node* inline_galoisCounterMode_AESCrypt_predicate();
|
||||
|
||||
bool inline_profileBoolean();
|
||||
bool inline_isCompileConstant();
|
||||
|
@ -955,6 +955,31 @@ const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
//for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
|
||||
const TypeFunc* OptoRuntime::galoisCounterMode_aescrypt_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 8;
|
||||
int argcnt = num_args;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] in + inOfs
|
||||
fields[argp++] = TypeInt::INT; // int len
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] ct + ctOfs
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] out + outOfs
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] key from AESCrypt obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // long[] state from GHASH obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // long[] subkeyHtbl from GHASH obj
|
||||
fields[argp++] = TypePtr::NOTNULL; // byte[] counter from GCTR obj
|
||||
|
||||
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
|
||||
// returning cipher len (int)
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms + 0] = TypeInt::INT;
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
/*
|
||||
* void implCompress(byte[] buf, int ofs)
|
||||
*/
|
||||
|
@ -257,6 +257,7 @@ private:
|
||||
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
|
||||
static const TypeFunc* electronicCodeBook_aescrypt_Type();
|
||||
static const TypeFunc* counterMode_aescrypt_Type();
|
||||
static const TypeFunc* galoisCounterMode_aescrypt_Type();
|
||||
|
||||
static const TypeFunc* digestBase_implCompress_Type(bool is_sha3);
|
||||
static const TypeFunc* digestBase_implCompressMB_Type(bool is_sha3);
|
||||
|
@ -124,6 +124,7 @@ address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
|
||||
address StubRoutines::_electronicCodeBook_encryptAESCrypt = NULL;
|
||||
address StubRoutines::_electronicCodeBook_decryptAESCrypt = NULL;
|
||||
address StubRoutines::_counterMode_AESCrypt = NULL;
|
||||
address StubRoutines::_galoisCounterMode_AESCrypt = NULL;
|
||||
address StubRoutines::_ghash_processBlocks = NULL;
|
||||
address StubRoutines::_base64_encodeBlock = NULL;
|
||||
address StubRoutines::_base64_decodeBlock = NULL;
|
||||
|
@ -206,6 +206,7 @@ class StubRoutines: AllStatic {
|
||||
static address _electronicCodeBook_encryptAESCrypt;
|
||||
static address _electronicCodeBook_decryptAESCrypt;
|
||||
static address _counterMode_AESCrypt;
|
||||
static address _galoisCounterMode_AESCrypt;
|
||||
static address _ghash_processBlocks;
|
||||
static address _base64_encodeBlock;
|
||||
static address _base64_decodeBlock;
|
||||
@ -410,6 +411,7 @@ class StubRoutines: AllStatic {
|
||||
static address montgomerySquare() { return _montgomerySquare; }
|
||||
static address bigIntegerRightShift() { return _bigIntegerRightShiftWorker; }
|
||||
static address bigIntegerLeftShift() { return _bigIntegerLeftShiftWorker; }
|
||||
static address galoisCounterMode_AESCrypt() { return _galoisCounterMode_AESCrypt; }
|
||||
|
||||
static address vectorizedMismatch() { return _vectorizedMismatch; }
|
||||
|
||||
|
@ -540,6 +540,7 @@
|
||||
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
|
||||
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
|
||||
static_field(StubRoutines, _counterMode_AESCrypt, address) \
|
||||
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
|
||||
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||
static_field(StubRoutines, _base64_encodeBlock, address) \
|
||||
static_field(StubRoutines, _base64_decodeBlock, address) \
|
||||
|
@ -122,7 +122,7 @@ final class GHASH implements Cloneable, GCM {
|
||||
|
||||
/* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */
|
||||
|
||||
// hashtable subkeyHtbl holds 2*9 powers of subkeyH computed using
|
||||
// hashtable subkeyHtbl holds 2*57 powers of subkeyH computed using
|
||||
// carry-less multiplication
|
||||
private long[] subkeyHtbl;
|
||||
|
||||
@ -143,7 +143,9 @@ final class GHASH implements Cloneable, GCM {
|
||||
throw new ProviderException("Internal error");
|
||||
}
|
||||
state = new long[2];
|
||||
subkeyHtbl = new long[2*9];
|
||||
// 48 keys for the interleaved implementation,
|
||||
// 8 for avx-ghash implementation and 1 for the original key
|
||||
subkeyHtbl = new long[2*57];
|
||||
subkeyHtbl[0] = (long)asLongView.get(subkeyH, 0);
|
||||
subkeyHtbl[1] = (long)asLongView.get(subkeyH, 8);
|
||||
}
|
||||
@ -264,7 +266,7 @@ final class GHASH implements Cloneable, GCM {
|
||||
throw new RuntimeException("internal state has invalid length: " +
|
||||
st.length);
|
||||
}
|
||||
if (subH.length != 18) {
|
||||
if (subH.length != 114) {
|
||||
throw new RuntimeException("internal subkeyHtbl has invalid length: " +
|
||||
subH.length);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
package com.sun.crypto.provider;
|
||||
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import sun.nio.ch.DirectBuffer;
|
||||
import sun.security.jca.JCAUtil;
|
||||
import sun.security.util.ArrayUtil;
|
||||
@ -55,6 +56,8 @@ import java.security.spec.AlgorithmParameterSpec;
|
||||
import java.security.spec.InvalidParameterSpecException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import jdk.internal.vm.annotation.IntrinsicCandidate;
|
||||
|
||||
/**
|
||||
* This class represents ciphers in GaloisCounter (GCM) mode.
|
||||
*
|
||||
@ -82,6 +85,8 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
private static final int MAX_BUF_SIZE = Integer.MAX_VALUE;
|
||||
// data size when buffer is divided up to aid in intrinsics
|
||||
private static final int TRIGGERLEN = 65536; // 64k
|
||||
// x86-64 parallel intrinsic data size
|
||||
private static final int PARALLEL_LEN = 768;
|
||||
|
||||
static final byte[] EMPTY_BUF = new byte[0];
|
||||
|
||||
@ -566,35 +571,64 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate if the given data lengths and the already processed data
|
||||
* exceeds the maximum allowed processed data by GCM.
|
||||
* @param lengths lengths of unprocessed data.
|
||||
* Intrinsic for Vector AES Galois Counter Mode implementation.
|
||||
* AES and GHASH operations are interleaved in the intrinsic implementation.
|
||||
* return - number of processed bytes
|
||||
*
|
||||
* Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic.
|
||||
* inLen that is less than 768 size block sizes, before or after this
|
||||
* intrinsic is used, will be done by the calling method
|
||||
* @param in input buffer
|
||||
* @param inOfs input offset
|
||||
* @param inLen input length
|
||||
* @param ct buffer that ghash will read (in for encrypt, out for decrypt)
|
||||
* @param ctOfs offset for ct buffer
|
||||
* @param out output buffer
|
||||
* @param outOfs output offset
|
||||
* @param gctr object for the GCTR operation
|
||||
* @param ghash object for the ghash operation
|
||||
* @return number of processed bytes
|
||||
*/
|
||||
private void checkDataLength(int ... lengths) {
|
||||
int max = MAX_BUF_SIZE;
|
||||
for (int len : lengths) {
|
||||
max = Math.subtractExact(max, len);
|
||||
}
|
||||
if (engine.processed > max) {
|
||||
throw new ProviderException("SunJCE provider only supports " +
|
||||
"input size up to " + MAX_BUF_SIZE + " bytes");
|
||||
@IntrinsicCandidate
|
||||
private static int implGCMCrypt(byte[] in, int inOfs, int inLen,
|
||||
byte[] ct, int ctOfs, byte[] out, int outOfs,
|
||||
GCTR gctr, GHASH ghash) {
|
||||
|
||||
inLen -= (inLen % PARALLEL_LEN);
|
||||
|
||||
int len = 0;
|
||||
int cOfs = ctOfs;
|
||||
if (inLen >= TRIGGERLEN) {
|
||||
int i = 0;
|
||||
int segments = (inLen / 6);
|
||||
segments -= segments % gctr.blockSize;
|
||||
do {
|
||||
len += gctr.update(in, inOfs + len, segments, out,
|
||||
outOfs + len);
|
||||
ghash.update(ct, cOfs, segments);
|
||||
cOfs = ctOfs + len;
|
||||
} while (++i < 5);
|
||||
|
||||
inLen -= len;
|
||||
}
|
||||
|
||||
len += gctr.update(in, inOfs + len, inLen, out, outOfs + len);
|
||||
ghash.update(ct, cOfs, inLen);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Abstract class for GCMEncrypt and GCMDecrypt internal context objects
|
||||
*/
|
||||
abstract class GCMEngine {
|
||||
byte[] preCounterBlock;
|
||||
GCTR gctrPAndC;
|
||||
GHASH ghashAllToS;
|
||||
GCTR gctr;
|
||||
GHASH ghash;
|
||||
|
||||
// Block size of the algorithm
|
||||
final int blockSize;
|
||||
|
||||
// length of total data, i.e. len(C)
|
||||
int processed = 0;
|
||||
|
||||
// buffer for AAD data; if null, meaning update has been called
|
||||
ByteArrayOutputStream aadBuffer = null;
|
||||
int sizeOfAAD = 0;
|
||||
@ -608,7 +642,6 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
byte[] originalOut = null;
|
||||
int originalOutOfs = 0;
|
||||
|
||||
|
||||
GCMEngine(SymmetricCipher blockCipher) {
|
||||
blockSize = blockCipher.getBlockSize();
|
||||
byte[] subkeyH = new byte[blockSize];
|
||||
@ -616,8 +649,8 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
preCounterBlock = getJ0(iv, subkeyH, blockSize);
|
||||
byte[] j0Plus1 = preCounterBlock.clone();
|
||||
increment32(j0Plus1);
|
||||
gctrPAndC = new GCTR(blockCipher, j0Plus1);
|
||||
ghashAllToS = new GHASH(subkeyH);
|
||||
gctr = new GCTR(blockCipher, j0Plus1);
|
||||
ghash = new GHASH(subkeyH);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -631,15 +664,15 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
abstract int getOutputSize(int inLen, boolean isFinal);
|
||||
|
||||
// Update operations
|
||||
abstract byte[] doUpdate(byte[] in, int inOff, int inLen);
|
||||
abstract int doUpdate(byte[] in, int inOff, int inLen, byte[] out,
|
||||
int outOff) throws ShortBufferException;
|
||||
abstract byte[] doUpdate(byte[] in, int inOfs, int inLen);
|
||||
abstract int doUpdate(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) throws ShortBufferException;
|
||||
abstract int doUpdate(ByteBuffer src, ByteBuffer dst)
|
||||
throws ShortBufferException;
|
||||
|
||||
// Final operations
|
||||
abstract int doFinal(byte[] in, int inOff, int inLen, byte[] out,
|
||||
int outOff) throws IllegalBlockSizeException, AEADBadTagException,
|
||||
abstract int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) throws IllegalBlockSizeException, AEADBadTagException,
|
||||
ShortBufferException;
|
||||
abstract int doFinal(ByteBuffer src, ByteBuffer dst)
|
||||
throws IllegalBlockSizeException, AEADBadTagException,
|
||||
@ -657,6 +690,48 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
return (ibuffer == null ? 0 : ibuffer.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* ByteBuffer wrapper for intrinsic implGCMCrypt. It will operate
|
||||
* on 768 byte blocks and let the calling method operate on smaller
|
||||
* sizes.
|
||||
*/
|
||||
int implGCMCrypt(ByteBuffer src, ByteBuffer dst) {
|
||||
int srcLen = src.remaining() - (src.remaining() % PARALLEL_LEN);
|
||||
|
||||
if (srcLen < PARALLEL_LEN) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int len;
|
||||
|
||||
if (src.hasArray() && dst.hasArray()) {
|
||||
ByteBuffer ct = (encryption ? dst : src);
|
||||
len = GaloisCounterMode.implGCMCrypt(src.array(),
|
||||
src.arrayOffset() + src.position(), srcLen,
|
||||
ct.array(), ct.arrayOffset() + ct.position(),
|
||||
dst.array(), dst.arrayOffset() + dst.position(),
|
||||
gctr, ghash);
|
||||
src.position(src.position() + len);
|
||||
dst.position(dst.position() + len);
|
||||
return len;
|
||||
|
||||
} else {
|
||||
|
||||
byte[] bin = new byte[PARALLEL_LEN];
|
||||
byte[] bout = new byte[PARALLEL_LEN];
|
||||
byte[] ct = (encryption ? bout : bin);
|
||||
len = srcLen;
|
||||
do {
|
||||
src.get(bin, 0, PARALLEL_LEN);
|
||||
len -= GaloisCounterMode.implGCMCrypt(bin, 0, PARALLEL_LEN,
|
||||
ct, 0, bout, 0, gctr, ghash);
|
||||
dst.put(bout, 0, PARALLEL_LEN);
|
||||
} while (len >= PARALLEL_LEN);
|
||||
|
||||
return srcLen - len;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The method takes two buffers to create one block of data. The
|
||||
* difference with the other mergeBlock is this will calculate
|
||||
@ -704,8 +779,6 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* (e.g., has not been initialized) or does not accept AAD, and one of
|
||||
* the {@code update} methods has already been called for the active
|
||||
* encryption/decryption operation
|
||||
* @throws UnsupportedOperationException if this method
|
||||
* has not been overridden by an implementation
|
||||
*/
|
||||
void updateAAD(byte[] src, int offset, int len) {
|
||||
if (encryption) {
|
||||
@ -733,12 +806,12 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
int lastLen = aad.length % blockSize;
|
||||
if (lastLen != 0) {
|
||||
ghashAllToS.update(aad, 0, aad.length - lastLen);
|
||||
ghash.update(aad, 0, aad.length - lastLen);
|
||||
byte[] padded = expandToOneBlock(aad,
|
||||
aad.length - lastLen, lastLen, blockSize);
|
||||
ghashAllToS.update(padded);
|
||||
ghash.update(padded);
|
||||
} else {
|
||||
ghashAllToS.update(aad);
|
||||
ghash.update(aad);
|
||||
}
|
||||
}
|
||||
aadBuffer = null;
|
||||
@ -751,18 +824,28 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* For input it takes the ibuffer which is wrapped in 'buffer' and 'src'
|
||||
* from doFinal.
|
||||
*/
|
||||
int doLastBlock(GCM op, ByteBuffer buffer, ByteBuffer src, ByteBuffer dst) {
|
||||
int resultLen = 0;
|
||||
int doLastBlock(GCMOperation op, ByteBuffer buffer, ByteBuffer src,
|
||||
ByteBuffer dst) {
|
||||
int len = 0;
|
||||
int resultLen;
|
||||
|
||||
int bLen = (buffer != null ? buffer.remaining() : 0);
|
||||
if (bLen > 0) {
|
||||
// en/decrypt on how much buffer there is in AES_BLOCK_SIZE
|
||||
// en/decrypt any PARALLEL_LEN sized data in the buffer
|
||||
if (bLen >= PARALLEL_LEN) {
|
||||
len = implGCMCrypt(buffer, dst);
|
||||
bLen -= len;
|
||||
}
|
||||
|
||||
// en/decrypt any blocksize data in the buffer
|
||||
if (bLen >= blockSize) {
|
||||
resultLen += op.update(buffer, dst);
|
||||
resultLen = op.update(buffer, dst);
|
||||
bLen -= resultLen;
|
||||
len += resultLen;
|
||||
}
|
||||
|
||||
// Process the remainder in the buffer
|
||||
if (bLen - resultLen > 0) {
|
||||
if (bLen > 0) {
|
||||
// Copy the buffer remainder into an extra block
|
||||
byte[] block = new byte[blockSize];
|
||||
int over = buffer.remaining();
|
||||
@ -773,76 +856,26 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
if (slen > 0) {
|
||||
src.get(block, over, slen);
|
||||
}
|
||||
int len = slen + over;
|
||||
if (len == blockSize) {
|
||||
resultLen += op.update(block, 0, blockSize, dst);
|
||||
int l = slen + over;
|
||||
if (l == blockSize) {
|
||||
len += op.update(block, 0, blockSize, dst);
|
||||
} else {
|
||||
resultLen += op.doFinal(block, 0, len, block,
|
||||
0);
|
||||
len += op.doFinal(block, 0, l, block,0);
|
||||
if (dst != null) {
|
||||
dst.put(block, 0, len);
|
||||
dst.put(block, 0, l);
|
||||
}
|
||||
processed += resultLen;
|
||||
return resultLen;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// en/decrypt whatever remains in src.
|
||||
// If src has been consumed, this will be a no-op
|
||||
if (src.remaining() > TRIGGERLEN) {
|
||||
resultLen += throttleData(op, src, dst);
|
||||
if (src.remaining() >= PARALLEL_LEN) {
|
||||
len += implGCMCrypt(src, dst);
|
||||
}
|
||||
|
||||
resultLen += op.doFinal(src, dst);
|
||||
processed += resultLen;
|
||||
return resultLen;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This segments large data into smaller chunks so hotspot will start
|
||||
* using GCTR and GHASH intrinsics sooner. This is a problem for app
|
||||
* and perf tests that only use large input sizes.
|
||||
*/
|
||||
int throttleData(GCM op, byte[] in, int inOfs, int inLen,
|
||||
byte[] out, int outOfs) {
|
||||
|
||||
int segments = (inLen / 6);
|
||||
segments -= segments % blockSize;
|
||||
int len = 0;
|
||||
int i = 0;
|
||||
do {
|
||||
len += op.update(in, inOfs + len, segments, out,outOfs + len);
|
||||
} while (++i < 5);
|
||||
|
||||
len += op.update(in, inOfs + len, inLen - len, out, outOfs + len);
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This segments large data into smaller chunks so hotspot will start
|
||||
* using GCTR and GHASH intrinsics sooner. This is a problem for app
|
||||
* and perf tests that only use large input sizes.
|
||||
*/
|
||||
int throttleData(GCM op, ByteBuffer src, ByteBuffer dst) {
|
||||
int inLen = src.limit();
|
||||
int segments = (src.remaining() / 6);
|
||||
segments -= segments % blockSize;
|
||||
int i = 0, resultLen = 0;
|
||||
do {
|
||||
src.limit(src.position() + segments);
|
||||
resultLen += op.update(src, dst);
|
||||
} while (++i < 5);
|
||||
|
||||
src.limit(inLen);
|
||||
// If there is still at least a blockSize left
|
||||
if (src.remaining() > blockSize) {
|
||||
resultLen += op.update(src, dst);
|
||||
}
|
||||
|
||||
return resultLen;
|
||||
return len + op.doFinal(src, dst);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -900,7 +933,11 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
// Position plus arrayOffset() will give us the true offset
|
||||
// from the underlying byte[] address.
|
||||
if (src.position() + src.arrayOffset() >=
|
||||
// If during encryption and the input offset is behind or
|
||||
// the same as the output offset, the same buffer can be
|
||||
// used. But during decryption always create a new
|
||||
// buffer in case of a bad auth tag.
|
||||
if (encryption && src.position() + src.arrayOffset() >=
|
||||
dst.position() + dst.arrayOffset()) {
|
||||
return dst;
|
||||
}
|
||||
@ -923,12 +960,15 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
/**
|
||||
* Overlap detection for data using byte array.
|
||||
* This is used for both overlap detection for the data or decryption
|
||||
* during in-place crypto, so to not overwrite the input if the auth tag
|
||||
* is invalid.
|
||||
*
|
||||
* If an intermediate array is needed, the original out array length is
|
||||
* allocated because for code simplicity.
|
||||
*/
|
||||
byte[] overlapDetection(byte[] in, int inOfs, byte[] out, int outOfs) {
|
||||
if (in == out && inOfs < outOfs) {
|
||||
if (in == out && (!encryption || inOfs < outOfs)) {
|
||||
originalOut = out;
|
||||
originalOutOfs = outOfs;
|
||||
return new byte[out.length];
|
||||
@ -969,11 +1009,31 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* Encryption Engine object
|
||||
*/
|
||||
class GCMEncrypt extends GCMEngine {
|
||||
GCTRGHASH gctrghash;
|
||||
GCMOperation op;
|
||||
|
||||
// data processed during encryption
|
||||
int processed = 0;
|
||||
|
||||
|
||||
GCMEncrypt(SymmetricCipher blockCipher) {
|
||||
super(blockCipher);
|
||||
gctrghash = new GCTRGHASH(gctrPAndC, ghashAllToS);
|
||||
op = new EncryptOp(gctr, ghash);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate if the given data lengths and the already processed data
|
||||
* exceeds the maximum allowed processed data by GCM.
|
||||
* @param lengths lengths of unprocessed data.
|
||||
*/
|
||||
private void checkDataLength(int ... lengths) {
|
||||
int max = MAX_BUF_SIZE;
|
||||
for (int len : lengths) {
|
||||
max = Math.subtractExact(max, len);
|
||||
if (processed > max) {
|
||||
throw new ProviderException("SunJCE provider only " +
|
||||
"supports input size up to " + MAX_BUF_SIZE + " bytes");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -1034,7 +1094,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
System.arraycopy(buffer, 0, block, 0, bLen);
|
||||
System.arraycopy(in, inOfs, block, bLen, remainder);
|
||||
|
||||
len = gctrghash.update(block, 0, blockSize, out, outOfs);
|
||||
len = op.update(block, 0, blockSize, out, outOfs);
|
||||
inOfs += remainder;
|
||||
inLen -= remainder;
|
||||
outOfs += blockSize;
|
||||
@ -1043,8 +1103,20 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
// Encrypt the remaining blocks inside of 'in'
|
||||
if (inLen >= PARALLEL_LEN) {
|
||||
int r = GaloisCounterMode.implGCMCrypt(in, inOfs, inLen, out,
|
||||
outOfs, out, outOfs, gctr, ghash);
|
||||
len += r;
|
||||
inOfs += r;
|
||||
inLen -= r;
|
||||
outOfs += r;
|
||||
}
|
||||
|
||||
if (inLen >= blockSize) {
|
||||
len += gctrghash.update(in, inOfs, inLen, out, outOfs);
|
||||
int r = op.update(in, inOfs, inLen, out, outOfs);
|
||||
len += r;
|
||||
inOfs += r;
|
||||
inLen -= r;
|
||||
}
|
||||
|
||||
// Write any remaining bytes less than a blockSize into ibuffer.
|
||||
@ -1089,21 +1161,32 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
ByteBuffer buffer = ByteBuffer.wrap(ibuffer.toByteArray());
|
||||
buffer.get(block, 0, bLen);
|
||||
src.get(block, bLen, remainder);
|
||||
len += cryptBlocks(
|
||||
ByteBuffer.wrap(block, 0, blockSize), dst);
|
||||
len += op.update(ByteBuffer.wrap(block, 0, blockSize),
|
||||
dst);
|
||||
ibuffer.reset();
|
||||
}
|
||||
}
|
||||
|
||||
// encrypt any blocksized data in 'src'
|
||||
if (src.remaining() >= blockSize) {
|
||||
len += cryptBlocks(src, dst);
|
||||
int srcLen = src.remaining();
|
||||
int resultLen;
|
||||
// encrypt any PARALLEL_LEN sized data in 'src'
|
||||
if (srcLen >= PARALLEL_LEN) {
|
||||
resultLen = implGCMCrypt(src, dst);
|
||||
srcLen -= resultLen;
|
||||
len += resultLen;
|
||||
}
|
||||
|
||||
// encrypt any blocksize data in 'src'
|
||||
if (srcLen >= blockSize) {
|
||||
resultLen = op.update(src, dst);
|
||||
srcLen -= resultLen;
|
||||
len += resultLen;
|
||||
}
|
||||
|
||||
// Write the remaining bytes into the 'ibuffer'
|
||||
if (src.remaining() > 0) {
|
||||
initBuffer(src.remaining());
|
||||
byte[] b = new byte[src.remaining()];
|
||||
if (srcLen > 0) {
|
||||
initBuffer(srcLen);
|
||||
byte[] b = new byte[srcLen];
|
||||
src.get(b);
|
||||
// remainder offset is based on original buffer length
|
||||
try {
|
||||
@ -1114,6 +1197,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
restoreDst(dst);
|
||||
processed += len;
|
||||
return len;
|
||||
}
|
||||
|
||||
@ -1127,7 +1211,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
try {
|
||||
ArrayUtil.nullAndBoundsCheck(out, outOfs, getOutputSize(inLen,
|
||||
true));
|
||||
} catch (ArrayIndexOutOfBoundsException aiobe) {
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
throw new ShortBufferException("Output buffer invalid");
|
||||
}
|
||||
|
||||
@ -1136,7 +1220,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
processAAD();
|
||||
out = overlapDetection(in, inOfs, out, outOfs);
|
||||
|
||||
int resultLen = 0;
|
||||
int len = 0;
|
||||
byte[] block;
|
||||
|
||||
// process what is in the ibuffer
|
||||
@ -1145,18 +1229,16 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
// Make a block if the remaining ibuffer and 'in' can make one.
|
||||
if (bLen + inLen >= blockSize) {
|
||||
int r, bufOfs = 0;
|
||||
int r;
|
||||
block = new byte[blockSize];
|
||||
r = mergeBlock(buffer, bufOfs, in, inOfs, inLen, block);
|
||||
r = mergeBlock(buffer, 0, in, inOfs, inLen, block);
|
||||
inOfs += r;
|
||||
inLen -= r;
|
||||
r = gctrghash.update(block, 0, blockSize, out,
|
||||
outOfs);
|
||||
outOfs += r;
|
||||
resultLen += r;
|
||||
processed += r;
|
||||
op.update(block, 0, blockSize, out, outOfs);
|
||||
outOfs += blockSize;
|
||||
len += blockSize;
|
||||
} else {
|
||||
// Need to consume all the ibuffer here to prepare for doFinal()
|
||||
// Need to consume the ibuffer here to prepare for doFinal()
|
||||
block = new byte[bLen + inLen];
|
||||
System.arraycopy(buffer, 0, block, 0, bLen);
|
||||
System.arraycopy(in, inOfs, block, bLen, inLen);
|
||||
@ -1167,28 +1249,18 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
// process what is left in the input buffer
|
||||
if (inLen > TRIGGERLEN) {
|
||||
int r = throttleData(gctrghash, in, inOfs, inLen, out, outOfs);
|
||||
inOfs += r;
|
||||
inLen -= r;
|
||||
outOfs += r;
|
||||
resultLen += r;
|
||||
processed += r;
|
||||
}
|
||||
|
||||
processed += gctrghash.doFinal(in, inOfs, inLen, out, outOfs);
|
||||
len += op.doFinal(in, inOfs, inLen, out, outOfs);
|
||||
outOfs += inLen;
|
||||
resultLen += inLen;
|
||||
|
||||
block = getLengthBlock(sizeOfAAD, processed);
|
||||
ghashAllToS.update(block);
|
||||
block = ghashAllToS.digest();
|
||||
block = getLengthBlock(sizeOfAAD, processed + len);
|
||||
ghash.update(block);
|
||||
block = ghash.digest();
|
||||
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
|
||||
tagLenBytes, block, 0);
|
||||
|
||||
// copy the tag to the end of the buffer
|
||||
System.arraycopy(block, 0, out, outOfs, tagLenBytes);
|
||||
int len = resultLen + tagLenBytes;
|
||||
len += tagLenBytes;
|
||||
restoreOut(out, len);
|
||||
|
||||
reInit = true;
|
||||
@ -1214,7 +1286,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
processAAD();
|
||||
if (len > 0) {
|
||||
processed += doLastBlock(gctrghash,
|
||||
processed += doLastBlock(op,
|
||||
(ibuffer == null || ibuffer.size() == 0) ? null :
|
||||
ByteBuffer.wrap(ibuffer.toByteArray()), src, dst);
|
||||
}
|
||||
@ -1225,8 +1297,8 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
byte[] block = getLengthBlock(sizeOfAAD, processed);
|
||||
ghashAllToS.update(block);
|
||||
block = ghashAllToS.digest();
|
||||
ghash.update(block);
|
||||
block = ghash.digest();
|
||||
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
|
||||
tagLenBytes, block, 0);
|
||||
dst.put(block, 0, tagLenBytes);
|
||||
@ -1235,18 +1307,6 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
reInit = true;
|
||||
return (len + tagLenBytes);
|
||||
}
|
||||
|
||||
// Handler method for encrypting blocks
|
||||
int cryptBlocks(ByteBuffer src, ByteBuffer dst) {
|
||||
int len;
|
||||
if (src.remaining() > TRIGGERLEN) {
|
||||
len = throttleData(gctrghash, src, dst);
|
||||
} else {
|
||||
len = gctrghash.update(src, dst);
|
||||
}
|
||||
processed += len;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1262,6 +1322,22 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
super(blockCipher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate if the given data lengths exceeds the maximum allowed
|
||||
* processed data by GCM.
|
||||
* @param lengths lengths of unprocessed data.
|
||||
*/
|
||||
private void checkDataLength(int ... lengths) {
|
||||
int max = MAX_BUF_SIZE;
|
||||
for (int len : lengths) {
|
||||
max = Math.subtractExact(max, len);
|
||||
if (max < 0) {
|
||||
throw new ProviderException("SunJCE provider only " +
|
||||
"supports input size up to " + MAX_BUF_SIZE + " bytes");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOutputSize(int inLen, boolean isFinal) {
|
||||
if (!isFinal) {
|
||||
@ -1311,9 +1387,8 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
processAAD();
|
||||
if (inLen > 0) {
|
||||
// store internally until decryptFinal is called because
|
||||
// spec mentioned that only return recovered data after tag
|
||||
// is successfully verified
|
||||
// store internally until doFinal. Per the spec, data is
|
||||
// returned after tag is successfully verified.
|
||||
initBuffer(inLen);
|
||||
ibuffer.write(in, inOfs, inLen);
|
||||
}
|
||||
@ -1350,38 +1425,43 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
/**
|
||||
* Use any data from ibuffer and 'in' to first verify the auth tag. If
|
||||
* the tag is valid, decrypt the data.
|
||||
* Use available data from ibuffer and 'in' to verify and decrypt the
|
||||
* data. If the verification fails, the 'out' left to it's original
|
||||
* values if crypto was in-place; otherwise 'out' is zeroed
|
||||
*/
|
||||
@Override
|
||||
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) throws IllegalBlockSizeException, AEADBadTagException,
|
||||
ShortBufferException {
|
||||
GHASH save = null;
|
||||
|
||||
int len = inLen + getBufferedLength();
|
||||
if (len < tagLenBytes) {
|
||||
throw new AEADBadTagException("Input data too short to " +
|
||||
"contain an expected tag length of " + tagLenBytes +
|
||||
"bytes");
|
||||
}
|
||||
|
||||
try {
|
||||
ArrayUtil.nullAndBoundsCheck(out, outOfs, len - tagLenBytes);
|
||||
} catch (ArrayIndexOutOfBoundsException aiobe) {
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
throw new ShortBufferException("Output buffer invalid");
|
||||
}
|
||||
|
||||
if (len < tagLenBytes) {
|
||||
throw new AEADBadTagException("Input too short - need tag");
|
||||
}
|
||||
|
||||
if (len - tagLenBytes > out.length - outOfs) {
|
||||
save = ghashAllToS.clone();
|
||||
throw new ShortBufferException("Output buffer too small, must" +
|
||||
"be at least " + (len - tagLenBytes) + " bytes long");
|
||||
}
|
||||
|
||||
checkDataLength(len - tagLenBytes);
|
||||
processAAD();
|
||||
|
||||
findTag(in, inOfs, inLen);
|
||||
byte[] block = getLengthBlock(sizeOfAAD,
|
||||
decryptBlocks(ghashAllToS, in, inOfs, inLen, null, 0));
|
||||
ghashAllToS.update(block);
|
||||
block = ghashAllToS.digest();
|
||||
out = overlapDetection(in, inOfs, out, outOfs);
|
||||
|
||||
len = decryptBlocks(new DecryptOp(gctr, ghash), in, inOfs, inLen,
|
||||
out, outOfs);
|
||||
byte[] block = getLengthBlock(sizeOfAAD, len);
|
||||
ghash.update(block);
|
||||
block = ghash.digest();
|
||||
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
|
||||
tagLenBytes, block, 0);
|
||||
|
||||
@ -1392,30 +1472,24 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
if (mismatch != 0) {
|
||||
throw new AEADBadTagException("Tag mismatch!");
|
||||
// Clear output data
|
||||
Arrays.fill(out, outOfs, outOfs + len, (byte) 0);
|
||||
throw new AEADBadTagException("Tag mismatch");
|
||||
}
|
||||
|
||||
if (save != null) {
|
||||
ghashAllToS = save;
|
||||
throw new ShortBufferException("Output buffer too small, must" +
|
||||
"be at least " + (len - tagLenBytes) + " bytes long");
|
||||
}
|
||||
|
||||
out = overlapDetection(in, inOfs, out, outOfs);
|
||||
len = decryptBlocks(gctrPAndC, in, inOfs, inLen, out, outOfs);
|
||||
restoreOut(out, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use any data from ibuffer and 'src' to first verify the auth tag. If
|
||||
* the tag is valid, decrypt the data.
|
||||
* Use available data from ibuffer and 'src' to verify and decrypt the
|
||||
* data. If the verification fails, the 'dst' left to it's original
|
||||
* values if crypto was in-place; otherwise 'dst' is zeroed
|
||||
*/
|
||||
@Override
|
||||
public int doFinal(ByteBuffer src, ByteBuffer dst)
|
||||
throws IllegalBlockSizeException, AEADBadTagException,
|
||||
ShortBufferException {
|
||||
GHASH save = null;
|
||||
|
||||
ByteBuffer tag;
|
||||
ByteBuffer ct = src.duplicate();
|
||||
@ -1432,11 +1506,10 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
|
||||
checkDataLength(len);
|
||||
|
||||
// Save GHASH context to allow the tag to be checked even though
|
||||
// the dst buffer is too short. Context will be restored so the
|
||||
// method can be called again with the proper sized dst buffer.
|
||||
// Verify dst is large enough
|
||||
if (len > dst.remaining()) {
|
||||
save = ghashAllToS.clone();
|
||||
throw new ShortBufferException("Output buffer too small, " +
|
||||
"must be at least " + len + " bytes long");
|
||||
}
|
||||
|
||||
// Create buffer 'tag' that contains only the auth tag
|
||||
@ -1459,20 +1532,19 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
tag.put(ct);
|
||||
tag.flip();
|
||||
} else {
|
||||
throw new AEADBadTagException("Input too short - need tag");
|
||||
throw new AEADBadTagException("Input data too short to " +
|
||||
"contain an expected tag length of " + tagLenBytes +
|
||||
"bytes");
|
||||
}
|
||||
|
||||
// Set the mark for a later reset. Either it will be zero, or the
|
||||
// tag buffer creation above will have consume some or all of it.
|
||||
ct.mark();
|
||||
|
||||
dst = overlapDetection(src, dst);
|
||||
dst.mark();
|
||||
processAAD();
|
||||
// Perform GHASH check on data
|
||||
doLastBlock(ghashAllToS, buffer, ct, null);
|
||||
len = doLastBlock(new DecryptOp(gctr, ghash), buffer, ct, dst);
|
||||
|
||||
byte[] block = getLengthBlock(sizeOfAAD, len);
|
||||
ghashAllToS.update(block);
|
||||
block = ghashAllToS.digest();
|
||||
ghash.update(block);
|
||||
block = ghash.digest();
|
||||
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
|
||||
tagLenBytes, block, 0);
|
||||
|
||||
@ -1483,32 +1555,22 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
if (mismatch != 0) {
|
||||
throw new AEADBadTagException("Tag mismatch!");
|
||||
// Clear output data
|
||||
dst.reset();
|
||||
if (dst.hasArray()) {
|
||||
int ofs = dst.arrayOffset() + dst.position();
|
||||
Arrays.fill(dst.array(), ofs , ofs + len, (byte)0);
|
||||
} else {
|
||||
Unsafe.getUnsafe().setMemory(((DirectBuffer)dst).address(),
|
||||
len + dst.position(), (byte)0);
|
||||
}
|
||||
throw new AEADBadTagException("Tag mismatch");
|
||||
}
|
||||
|
||||
if (save != null) {
|
||||
ghashAllToS = save;
|
||||
throw new ShortBufferException("Output buffer too small, must" +
|
||||
" be at least " + len + " bytes long");
|
||||
}
|
||||
|
||||
// Prepare for decryption
|
||||
if (buffer != null) {
|
||||
buffer.flip();
|
||||
}
|
||||
ct.reset();
|
||||
processed = 0;
|
||||
// Check for overlap in the bytebuffers
|
||||
dst = overlapDetection(src, dst);
|
||||
|
||||
// Decrypt the all the input data and put it into dst
|
||||
doLastBlock(gctrPAndC, buffer, ct, dst);
|
||||
restoreDst(dst);
|
||||
src.position(src.limit());
|
||||
if (ibuffer != null) {
|
||||
ibuffer.reset();
|
||||
}
|
||||
return processed;
|
||||
engine = null;
|
||||
restoreDst(dst);
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1517,11 +1579,12 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* When this method is used, all the data is either in the ibuffer
|
||||
* or in 'in'.
|
||||
*/
|
||||
int decryptBlocks(GCM op, byte[] in, int inOfs, int inLen,
|
||||
int decryptBlocks(GCMOperation op, byte[] in, int inOfs, int inLen,
|
||||
byte[] out, int outOfs) {
|
||||
byte[] buffer;
|
||||
byte[] block;
|
||||
int len = 0;
|
||||
int resultLen;
|
||||
|
||||
// Calculate the encrypted data length inside the ibuffer
|
||||
// considering the tag location
|
||||
@ -1538,15 +1601,24 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
if (bLen > 0) {
|
||||
buffer = ibuffer.toByteArray();
|
||||
|
||||
if (bLen >= blockSize) {
|
||||
len += op.update(buffer, 0, bLen, out, outOfs);
|
||||
outOfs += len; // noop for ghash
|
||||
if (bLen >= PARALLEL_LEN) {
|
||||
len = GaloisCounterMode.implGCMCrypt(buffer, 0, bLen,
|
||||
buffer, 0, out, outOfs, gctr, ghash);
|
||||
outOfs += len;
|
||||
// Use len as it becomes the ibuffer offset, if
|
||||
// needed, in the next op
|
||||
}
|
||||
|
||||
// merge the remaining ibuffer with the 'in'
|
||||
int bufRemainder = bLen - len;
|
||||
if (bufRemainder >= blockSize) {
|
||||
resultLen = op.update(buffer, len, bufRemainder, out,
|
||||
outOfs);
|
||||
len += resultLen;
|
||||
outOfs += resultLen;
|
||||
bufRemainder -= resultLen;
|
||||
}
|
||||
|
||||
// merge the remaining ibuffer with the 'in'
|
||||
if (bufRemainder > 0) {
|
||||
block = new byte[blockSize];
|
||||
int inUsed = mergeBlock(buffer, len, bufRemainder, in,
|
||||
@ -1557,9 +1629,9 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
// If is more than block between the merged data and 'in',
|
||||
// update(), otherwise setup for final
|
||||
if (inLen > 0) {
|
||||
int resultLen = op.update(block, 0, blockSize,
|
||||
resultLen = op.update(block, 0, blockSize,
|
||||
out, outOfs);
|
||||
outOfs += resultLen; // noop for ghash
|
||||
outOfs += resultLen;
|
||||
len += resultLen;
|
||||
} else {
|
||||
in = block;
|
||||
@ -1569,14 +1641,6 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
}
|
||||
|
||||
// Finish off the operation
|
||||
if (inLen > TRIGGERLEN) {
|
||||
int l = throttleData(op, in, inOfs, inLen, out, outOfs);
|
||||
inOfs += l;
|
||||
inLen -= l;
|
||||
outOfs += l; // noop for ghash
|
||||
len += l;
|
||||
}
|
||||
return len + op.doFinal(in, inOfs, inLen, out, outOfs);
|
||||
}
|
||||
}
|
||||
@ -1609,11 +1673,11 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
* This class is for encryption when both GCTR and GHASH
|
||||
* can operation in parallel.
|
||||
*/
|
||||
static final class GCTRGHASH implements GCM {
|
||||
static final class EncryptOp implements GCMOperation {
|
||||
GCTR gctr;
|
||||
GHASH ghash;
|
||||
|
||||
GCTRGHASH(GCTR c, GHASH g) {
|
||||
EncryptOp(GCTR c, GHASH g) {
|
||||
gctr = c;
|
||||
ghash = g;
|
||||
}
|
||||
@ -1645,19 +1709,96 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out, int outOfs) {
|
||||
int len = gctr.doFinal(in, inOfs, inLen, out, outOfs);
|
||||
ghash.doFinal(out, outOfs, len);
|
||||
return len;
|
||||
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) {
|
||||
int len = 0;
|
||||
|
||||
if (inLen >= PARALLEL_LEN) {
|
||||
len = implGCMCrypt(in, inOfs, inLen, out, outOfs, out, outOfs,
|
||||
gctr, ghash);
|
||||
inLen -= len;
|
||||
outOfs += len;
|
||||
}
|
||||
|
||||
gctr.doFinal(in, inOfs + len, inLen, out, outOfs);
|
||||
return len + ghash.doFinal(out, outOfs, inLen);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doFinal(ByteBuffer src, ByteBuffer dst) {
|
||||
dst.mark();
|
||||
int l = gctr.doFinal(src, dst);
|
||||
int len = gctr.doFinal(src, dst);
|
||||
dst.reset();
|
||||
ghash.doFinal(dst, l);
|
||||
return l;
|
||||
ghash.doFinal(dst, len);
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class is for decryption when both GCTR and GHASH
|
||||
* can operation in parallel.
|
||||
*/
|
||||
static final class DecryptOp implements GCMOperation {
|
||||
GCTR gctr;
|
||||
GHASH ghash;
|
||||
|
||||
DecryptOp(GCTR c, GHASH g) {
|
||||
gctr = c;
|
||||
ghash = g;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int update(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) {
|
||||
ghash.update(in, inOfs, inLen);
|
||||
return gctr.update(in, inOfs, inLen, out, outOfs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int update(byte[] in, int inOfs, int inLen, ByteBuffer dst) {
|
||||
ghash.update(in, inOfs, inLen);
|
||||
return gctr.update(in, inOfs, inLen, dst);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int update(ByteBuffer src, ByteBuffer dst) {
|
||||
src.mark();
|
||||
ghash.update(src, src.remaining());
|
||||
src.reset();
|
||||
return gctr.update(src, dst);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
|
||||
int outOfs) {
|
||||
int len = 0;
|
||||
if (inLen >= PARALLEL_LEN) {
|
||||
len += implGCMCrypt(in, inOfs, inLen, in, inOfs, out, outOfs,
|
||||
gctr, ghash);
|
||||
}
|
||||
ghash.doFinal(in, inOfs + len, inLen - len);
|
||||
return len + gctr.doFinal(in, inOfs + len, inLen - len, out,
|
||||
outOfs + len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doFinal(ByteBuffer src, ByteBuffer dst) {
|
||||
src.mark();
|
||||
ghash.doFinal(src, src.remaining());
|
||||
src.reset();
|
||||
return gctr.doFinal(src, dst);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Interface to organize encryption and decryption operations in the
|
||||
* proper order for GHASH and GCTR.
|
||||
*/
|
||||
public interface GCMOperation {
|
||||
int update(byte[] in, int inOfs, int inLen, byte[] out, int outOfs);
|
||||
int update(byte[] in, int inOfs, int inLen, ByteBuffer dst);
|
||||
int update(ByteBuffer src, ByteBuffer dst);
|
||||
int doFinal(byte[] in, int inOfs, int inLen, byte[] out, int outOfs);
|
||||
int doFinal(ByteBuffer src, ByteBuffer dst);
|
||||
}
|
||||
}
|
||||
|
@ -98,6 +98,27 @@
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DmsgSize=2054
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=2048
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
* compiler.codegen.aes.TestAESMain
|
||||
*
|
||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR
|
||||
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
|
||||
|
Loading…
x
Reference in New Issue
Block a user