8267125: AES Galois CounterMode (GCM) interleaved implementation using AVX512 + VAES instructions

Co-authored-by: Smita Kamath <svkamath@openjdk.org>
Co-authored-by: Tomasz Kantecki <tomasz.kantecki@intel.com>
Co-authored-by: Anthony Scarpino <ascarpino@openjdk.org>
Reviewed-by: kvn, valeriep
This commit is contained in:
Smita Kamath 2021-08-24 18:48:31 +00:00 committed by Anthony Scarpino
parent 6ace805f8c
commit 0e7288ffbf
21 changed files with 1318 additions and 246 deletions

@ -945,12 +945,19 @@ private:
void roundDec(XMMRegister key, int rnum);
void lastroundDec(XMMRegister key, int rnum);
void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask);
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
void generateHtbl_48_block_zmm(Register htbl);
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
bool final_reduction, int index, XMMRegister counter_inc_mask);
public:
void aesecb_encrypt(Register source_addr, Register dest_addr, Register key, Register len);
void aesecb_decrypt(Register source_addr, Register dest_addr, Register key, Register len);
void aesctr_encrypt(Register src_addr, Register dest_addr, Register key, Register counter,
Register len_reg, Register used, Register used_addr, Register saved_encCounter_start);
void aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
Register state, Register subkeyHtbl, Register counter);
#endif

@ -1267,4 +1267,627 @@ void MacroAssembler::aesctr_encrypt(Register src_addr, Register dest_addr, Regis
bind(EXIT);
}
#endif // _LP64
void MacroAssembler::gfmul_avx512(XMMRegister GH, XMMRegister HK) {
const XMMRegister TMP1 = xmm0;
const XMMRegister TMP2 = xmm1;
const XMMRegister TMP3 = xmm2;
evpclmulqdq(TMP1, GH, HK, 0x11, Assembler::AVX_512bit);
evpclmulqdq(TMP2, GH, HK, 0x00, Assembler::AVX_512bit);
evpclmulqdq(TMP3, GH, HK, 0x01, Assembler::AVX_512bit);
evpclmulqdq(GH, GH, HK, 0x10, Assembler::AVX_512bit);
evpxorq(GH, GH, TMP3, Assembler::AVX_512bit);
vpsrldq(TMP3, GH, 8, Assembler::AVX_512bit);
vpslldq(GH, GH, 8, Assembler::AVX_512bit);
evpxorq(TMP1, TMP1, TMP3, Assembler::AVX_512bit);
evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
evmovdquq(TMP3, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, r15);
evpclmulqdq(TMP2, TMP3, GH, 0x01, Assembler::AVX_512bit);
vpslldq(TMP2, TMP2, 8, Assembler::AVX_512bit);
evpxorq(GH, GH, TMP2, Assembler::AVX_512bit);
evpclmulqdq(TMP2, TMP3, GH, 0x00, Assembler::AVX_512bit);
vpsrldq(TMP2, TMP2, 4, Assembler::AVX_512bit);
evpclmulqdq(GH, TMP3, GH, 0x10, Assembler::AVX_512bit);
vpslldq(GH, GH, 4, Assembler::AVX_512bit);
vpternlogq(GH, 0x96, TMP1, TMP2, Assembler::AVX_512bit);
}
void MacroAssembler::generateHtbl_48_block_zmm(Register htbl) {
const XMMRegister HK = xmm6;
const XMMRegister ZT5 = xmm4;
const XMMRegister ZT7 = xmm7;
const XMMRegister ZT8 = xmm8;
Label GFMUL_AVX512;
movdqu(HK, Address(htbl, 0));
movdqu(xmm10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
vpshufb(HK, HK, xmm10, Assembler::AVX_128bit);
movdqu(xmm11, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 64)); // Poly
movdqu(xmm12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr() + 80)); // Twoone
// Compute H ^ 2 from the input subkeyH
movdqu(xmm2, xmm6);
vpsllq(xmm6, xmm6, 1, Assembler::AVX_128bit);
vpsrlq(xmm2, xmm2, 63, Assembler::AVX_128bit);
movdqu(xmm1, xmm2);
vpslldq(xmm2, xmm2, 8, Assembler::AVX_128bit);
vpsrldq(xmm1, xmm1, 8, Assembler::AVX_128bit);
vpor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
vpshufd(xmm2, xmm1, 0x24, Assembler::AVX_128bit);
vpcmpeqd(xmm2, xmm2, xmm12, AVX_128bit);
vpand(xmm2, xmm2, xmm11, Assembler::AVX_128bit);
vpxor(xmm6, xmm6, xmm2, Assembler::AVX_128bit);
movdqu(Address(htbl, 16 * 56), xmm6); // H ^ 2
// Compute the remaining three powers of H using XMM registers and all following powers using ZMM
movdqu(ZT5, HK);
vinserti32x4(ZT7, ZT7, HK, 3);
gfmul_avx512(ZT5, HK);
movdqu(Address(htbl, 16 * 55), ZT5); // H ^ 2 * 2
vinserti32x4(ZT7, ZT7, ZT5, 2);
gfmul_avx512(ZT5, HK);
movdqu(Address(htbl, 16 * 54), ZT5); // H ^ 2 * 3
vinserti32x4(ZT7, ZT7, ZT5, 1);
gfmul_avx512(ZT5, HK);
movdqu(Address(htbl, 16 * 53), ZT5); // H ^ 2 * 4
vinserti32x4(ZT7, ZT7, ZT5, 0);
evshufi64x2(ZT5, ZT5, ZT5, 0x00, Assembler::AVX_512bit);
evmovdquq(ZT8, ZT7, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 49), ZT7, Assembler::AVX_512bit);
evshufi64x2(ZT5, ZT7, ZT7, 0x00, Assembler::AVX_512bit);
gfmul_avx512(ZT8, ZT5);
evmovdquq(Address(htbl, 16 * 45), ZT8, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 41), ZT7, Assembler::AVX_512bit);
gfmul_avx512(ZT8, ZT5);
evmovdquq(Address(htbl, 16 * 37), ZT8, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 33), ZT7, Assembler::AVX_512bit);
gfmul_avx512(ZT8, ZT5);
evmovdquq(Address(htbl, 16 * 29), ZT8, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 25), ZT7, Assembler::AVX_512bit);
gfmul_avx512(ZT8, ZT5);
evmovdquq(Address(htbl, 16 * 21), ZT8, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 17), ZT7, Assembler::AVX_512bit);
gfmul_avx512(ZT8, ZT5);
evmovdquq(Address(htbl, 16 * 13), ZT8, Assembler::AVX_512bit);
gfmul_avx512(ZT7, ZT5);
evmovdquq(Address(htbl, 16 * 9), ZT7, Assembler::AVX_512bit);
ret(0);
}
#define vclmul_reduce(out, poly, hi128, lo128, tmp0, tmp1) \
evpclmulqdq(tmp0, poly, lo128, 0x01, Assembler::AVX_512bit); \
vpslldq(tmp0, tmp0, 8, Assembler::AVX_512bit); \
evpxorq(tmp0, lo128, tmp0, Assembler::AVX_512bit); \
evpclmulqdq(tmp1, poly, tmp0, 0x00, Assembler::AVX_512bit); \
vpsrldq(tmp1, tmp1, 4, Assembler::AVX_512bit); \
evpclmulqdq(out, poly, tmp0, 0x10, Assembler::AVX_512bit); \
vpslldq(out, out, 4, Assembler::AVX_512bit); \
vpternlogq(out, 0x96, tmp1, hi128, Assembler::AVX_512bit); \
#define vhpxori4x128(reg, tmp) \
vextracti64x4(tmp, reg, 1); \
evpxorq(reg, reg, tmp, Assembler::AVX_256bit); \
vextracti32x4(tmp, reg, 1); \
evpxorq(reg, reg, tmp, Assembler::AVX_128bit); \
#define roundEncode(key, dst1, dst2, dst3, dst4) \
vaesenc(dst1, dst1, key, Assembler::AVX_512bit); \
vaesenc(dst2, dst2, key, Assembler::AVX_512bit); \
vaesenc(dst3, dst3, key, Assembler::AVX_512bit); \
vaesenc(dst4, dst4, key, Assembler::AVX_512bit); \
#define lastroundEncode(key, dst1, dst2, dst3, dst4) \
vaesenclast(dst1, dst1, key, Assembler::AVX_512bit); \
vaesenclast(dst2, dst2, key, Assembler::AVX_512bit); \
vaesenclast(dst3, dst3, key, Assembler::AVX_512bit); \
vaesenclast(dst4, dst4, key, Assembler::AVX_512bit); \
#define storeData(dst, position, src1, src2, src3, src4) \
evmovdquq(Address(dst, position, Address::times_1, 0 * 64), src1, Assembler::AVX_512bit); \
evmovdquq(Address(dst, position, Address::times_1, 1 * 64), src2, Assembler::AVX_512bit); \
evmovdquq(Address(dst, position, Address::times_1, 2 * 64), src3, Assembler::AVX_512bit); \
evmovdquq(Address(dst, position, Address::times_1, 3 * 64), src4, Assembler::AVX_512bit); \
#define loadData(src, position, dst1, dst2, dst3, dst4) \
evmovdquq(dst1, Address(src, position, Address::times_1, 0 * 64), Assembler::AVX_512bit); \
evmovdquq(dst2, Address(src, position, Address::times_1, 1 * 64), Assembler::AVX_512bit); \
evmovdquq(dst3, Address(src, position, Address::times_1, 2 * 64), Assembler::AVX_512bit); \
evmovdquq(dst4, Address(src, position, Address::times_1, 3 * 64), Assembler::AVX_512bit); \
#define carrylessMultiply(dst00, dst01, dst10, dst11, ghdata, hkey) \
evpclmulqdq(dst00, ghdata, hkey, 0x00, Assembler::AVX_512bit); \
evpclmulqdq(dst01, ghdata, hkey, 0x01, Assembler::AVX_512bit); \
evpclmulqdq(dst10, ghdata, hkey, 0x10, Assembler::AVX_512bit); \
evpclmulqdq(dst11, ghdata, hkey, 0x11, Assembler::AVX_512bit); \
#define shuffleExorRnd1Key(dst0, dst1, dst2, dst3, shufmask, rndkey) \
vpshufb(dst0, dst0, shufmask, Assembler::AVX_512bit); \
evpxorq(dst0, dst0, rndkey, Assembler::AVX_512bit); \
vpshufb(dst1, dst1, shufmask, Assembler::AVX_512bit); \
evpxorq(dst1, dst1, rndkey, Assembler::AVX_512bit); \
vpshufb(dst2, dst2, shufmask, Assembler::AVX_512bit); \
evpxorq(dst2, dst2, rndkey, Assembler::AVX_512bit); \
vpshufb(dst3, dst3, shufmask, Assembler::AVX_512bit); \
evpxorq(dst3, dst3, rndkey, Assembler::AVX_512bit); \
#define xorBeforeStore(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
evpxorq(dst0, dst0, src0, Assembler::AVX_512bit); \
evpxorq(dst1, dst1, src1, Assembler::AVX_512bit); \
evpxorq(dst2, dst2, src2, Assembler::AVX_512bit); \
evpxorq(dst3, dst3, src3, Assembler::AVX_512bit); \
#define xorGHASH(dst0, dst1, dst2, dst3, src02, src03, src12, src13, src22, src23, src32, src33) \
vpternlogq(dst0, 0x96, src02, src03, Assembler::AVX_512bit); \
vpternlogq(dst1, 0x96, src12, src13, Assembler::AVX_512bit); \
vpternlogq(dst2, 0x96, src22, src23, Assembler::AVX_512bit); \
vpternlogq(dst3, 0x96, src32, src33, Assembler::AVX_512bit); \
void MacroAssembler::ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx, XMMRegister aad_hashx,
Register in, Register out, Register data, Register pos, bool first_time_reduction, XMMRegister addmask, bool ghash_input, Register rounds,
Register ghash_pos, bool final_reduction, int i, XMMRegister counter_inc_mask) {
Label AES_192, AES_256, LAST_AES_RND;
const XMMRegister ZTMP0 = xmm0;
const XMMRegister ZTMP1 = xmm3;
const XMMRegister ZTMP2 = xmm4;
const XMMRegister ZTMP3 = xmm5;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
const XMMRegister ZTMP7 = xmm11;
const XMMRegister ZTMP8 = xmm12;
const XMMRegister ZTMP9 = xmm13;
const XMMRegister ZTMP10 = xmm15;
const XMMRegister ZTMP11 = xmm16;
const XMMRegister ZTMP12 = xmm17;
const XMMRegister ZTMP13 = xmm19;
const XMMRegister ZTMP14 = xmm20;
const XMMRegister ZTMP15 = xmm21;
const XMMRegister ZTMP16 = xmm30;
const XMMRegister ZTMP17 = xmm31;
const XMMRegister ZTMP18 = xmm1;
const XMMRegister ZTMP19 = xmm2;
const XMMRegister ZTMP20 = xmm8;
const XMMRegister ZTMP21 = xmm22;
const XMMRegister ZTMP22 = xmm23;
// Pre increment counters
vpaddd(ZTMP0, ctr_blockx, counter_inc_mask, Assembler::AVX_512bit);
vpaddd(ZTMP1, ZTMP0, counter_inc_mask, Assembler::AVX_512bit);
vpaddd(ZTMP2, ZTMP1, counter_inc_mask, Assembler::AVX_512bit);
vpaddd(ZTMP3, ZTMP2, counter_inc_mask, Assembler::AVX_512bit);
// Save counter value
evmovdquq(ctr_blockx, ZTMP3, Assembler::AVX_512bit);
// Reuse ZTMP17 / ZTMP18 for loading AES Keys
// Pre-load AES round keys
ev_load_key(ZTMP17, key, 0, xmm29);
ev_load_key(ZTMP18, key, 1 * 16, xmm29);
// ZTMP19 & ZTMP20 used for loading hash key
// Pre-load hash key
evmovdquq(ZTMP19, Address(subkeyHtbl, i * 64 + 144), Assembler::AVX_512bit);
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
// Load data for computing ghash
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
// Xor cipher block 0 with input ghash, if available
if (ghash_input) {
evpxorq(ZTMP21, ZTMP21, aad_hashx, Assembler::AVX_512bit);
}
// Load data for computing ghash
evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
// stitch AES rounds with GHASH
// AES round 0, xmm24 has shuffle mask
shuffleExorRnd1Key(ZTMP0, ZTMP1, ZTMP2, ZTMP3, xmm24, ZTMP17);
// Reuse ZTMP17 / ZTMP18 for loading remaining AES Keys
ev_load_key(ZTMP17, key, 2 * 16, xmm29);
// GHASH 4 blocks
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP21, ZTMP19);
// Load the next hkey and Ghash data
evmovdquq(ZTMP19, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
evmovdquq(ZTMP21, Address(data, ghash_pos, Address::times_1, 2 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP21, ZTMP21, xmm24, Assembler::AVX_512bit);
// AES round 1
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 3 * 16, xmm29);
// GHASH 4 blocks(11 to 8)
carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
// Load the next hkey and GDATA
evmovdquq(ZTMP20, Address(subkeyHtbl, ++i * 64 + 144), Assembler::AVX_512bit);
evmovdquq(ZTMP22, Address(data, ghash_pos, Address::times_1, 3 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP22, ZTMP22, xmm24, Assembler::AVX_512bit);
// AES round 2
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 4 * 16, xmm29);
// GHASH 4 blocks(7 to 4)
carrylessMultiply(ZTMP14, ZTMP16, ZTMP15, ZTMP13, ZTMP21, ZTMP19);
// AES rounds 3
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 5 * 16, xmm29);
// Gather(XOR) GHASH for 12 blocks
xorGHASH(ZTMP5, ZTMP6, ZTMP8, ZTMP7, ZTMP9, ZTMP13, ZTMP10, ZTMP14, ZTMP12, ZTMP16, ZTMP11, ZTMP15);
// AES rounds 4
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 6 * 16, xmm29);
// load plain / cipher text(recycle registers)
loadData(in, pos, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
// AES rounds 5
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 7 * 16, xmm29);
// GHASH 4 blocks(3 to 0)
carrylessMultiply(ZTMP10, ZTMP12, ZTMP11, ZTMP9, ZTMP22, ZTMP20);
// AES round 6
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 8 * 16, xmm29);
// gather GHASH in ZTMP6(low) and ZTMP5(high)
if (first_time_reduction) {
vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
evpxorq(xmm25, ZTMP7, ZTMP11, Assembler::AVX_512bit);
evpxorq(xmm27, ZTMP5, ZTMP9, Assembler::AVX_512bit);
evpxorq(xmm26, ZTMP6, ZTMP10, Assembler::AVX_512bit);
}
else if (!first_time_reduction && !final_reduction) {
xorGHASH(ZTMP7, xmm25, xmm27, xmm26, ZTMP8, ZTMP12, ZTMP7, ZTMP11, ZTMP5, ZTMP9, ZTMP6, ZTMP10);
}
if (final_reduction) {
// Phase one: Add mid products together
// Also load polynomial constant for reduction
vpternlogq(ZTMP7, 0x96, ZTMP8, ZTMP12, Assembler::AVX_512bit);
vpternlogq(ZTMP7, 0x96, xmm25, ZTMP11, Assembler::AVX_512bit);
vpsrldq(ZTMP11, ZTMP7, 8, Assembler::AVX_512bit);
vpslldq(ZTMP7, ZTMP7, 8, Assembler::AVX_512bit);
evmovdquq(ZTMP12, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
}
// AES round 7
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 9 * 16, xmm29);
if (final_reduction) {
vpternlogq(ZTMP5, 0x96, ZTMP9, ZTMP11, Assembler::AVX_512bit);
evpxorq(ZTMP5, ZTMP5, xmm27, Assembler::AVX_512bit);
vpternlogq(ZTMP6, 0x96, ZTMP10, ZTMP7, Assembler::AVX_512bit);
evpxorq(ZTMP6, ZTMP6, xmm26, Assembler::AVX_512bit);
}
// AES round 8
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 10 * 16, xmm29);
// Horizontal xor of low and high 4*128
if (final_reduction) {
vhpxori4x128(ZTMP5, ZTMP9);
vhpxori4x128(ZTMP6, ZTMP10);
}
// AES round 9
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
// First phase of reduction
if (final_reduction) {
evpclmulqdq(ZTMP10, ZTMP12, ZTMP6, 0x01, Assembler::AVX_128bit);
vpslldq(ZTMP10, ZTMP10, 8, Assembler::AVX_128bit);
evpxorq(ZTMP10, ZTMP6, ZTMP10, Assembler::AVX_128bit);
}
cmpl(rounds, 52);
jcc(Assembler::greaterEqual, AES_192);
jmp(LAST_AES_RND);
// AES rounds upto 11 (AES192) or 13 (AES256)
bind(AES_192);
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 11 * 16, xmm29);
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 12 * 16, xmm29);
cmpl(rounds, 60);
jcc(Assembler::aboveEqual, AES_256);
jmp(LAST_AES_RND);
bind(AES_256);
roundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP18, key, 13 * 16, xmm29);
roundEncode(ZTMP18, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
ev_load_key(ZTMP17, key, 14 * 16, xmm29);
bind(LAST_AES_RND);
// Second phase of reduction
if (final_reduction) {
evpclmulqdq(ZTMP9, ZTMP12, ZTMP10, 0x00, Assembler::AVX_128bit);
vpsrldq(ZTMP9, ZTMP9, 4, Assembler::AVX_128bit); // Shift-R 1-DW to obtain 2-DWs shift-R
evpclmulqdq(ZTMP11, ZTMP12, ZTMP10, 0x10, Assembler::AVX_128bit);
vpslldq(ZTMP11, ZTMP11, 4, Assembler::AVX_128bit); // Shift-L 1-DW for result
// ZTMP5 = ZTMP5 X ZTMP11 X ZTMP9
vpternlogq(ZTMP5, 0x96, ZTMP11, ZTMP9, Assembler::AVX_128bit);
}
// Last AES round
lastroundEncode(ZTMP17, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
// XOR against plain / cipher text
xorBeforeStore(ZTMP0, ZTMP1, ZTMP2, ZTMP3, ZTMP13, ZTMP14, ZTMP15, ZTMP16);
// store cipher / plain text
storeData(out, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
}
void MacroAssembler::aesgcm_encrypt(Register in, Register len, Register ct, Register out, Register key,
Register state, Register subkeyHtbl, Register counter) {
Label ENC_DEC_DONE, GENERATE_HTBL_48_BLKS, AES_192, AES_256, STORE_CT, GHASH_LAST_32,
AES_32_BLOCKS, GHASH_AES_PARALLEL, LOOP, ACCUMULATE, GHASH_16_AES_16;
const XMMRegister CTR_BLOCKx = xmm9;
const XMMRegister AAD_HASHx = xmm14;
const Register pos = rax;
const Register rounds = r15;
Register ghash_pos;
#ifndef _WIN64
ghash_pos = r14;
#else
ghash_pos = r11;
#endif // !_WIN64
const XMMRegister ZTMP0 = xmm0;
const XMMRegister ZTMP1 = xmm3;
const XMMRegister ZTMP2 = xmm4;
const XMMRegister ZTMP3 = xmm5;
const XMMRegister ZTMP4 = xmm6;
const XMMRegister ZTMP5 = xmm7;
const XMMRegister ZTMP6 = xmm10;
const XMMRegister ZTMP7 = xmm11;
const XMMRegister ZTMP8 = xmm12;
const XMMRegister ZTMP9 = xmm13;
const XMMRegister ZTMP10 = xmm15;
const XMMRegister ZTMP11 = xmm16;
const XMMRegister ZTMP12 = xmm17;
const XMMRegister ZTMP13 = xmm19;
const XMMRegister ZTMP14 = xmm20;
const XMMRegister ZTMP15 = xmm21;
const XMMRegister ZTMP16 = xmm30;
const XMMRegister COUNTER_INC_MASK = xmm18;
movl(pos, 0); // Total length processed
// Min data size processed = 768 bytes
cmpl(len, 768);
jcc(Assembler::less, ENC_DEC_DONE);
// Generate 48 constants for htbl
call(GENERATE_HTBL_48_BLKS, relocInfo::none);
int index = 0; // Index for choosing subkeyHtbl entry
movl(ghash_pos, 0); // Pointer for ghash read and store operations
// Move initial counter value and STATE value into variables
movdqu(CTR_BLOCKx, Address(counter, 0));
movdqu(AAD_HASHx, Address(state, 0));
// Load lswap mask for ghash
movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()), rbx);
// Shuffle input state using lswap mask
vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
// Compute #rounds for AES based on the length of the key array
movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
// Broadcast counter value to 512 bit register
evshufi64x2(CTR_BLOCKx, CTR_BLOCKx, CTR_BLOCKx, 0, Assembler::AVX_512bit);
// Load counter shuffle mask
evmovdquq(xmm24, ExternalAddress(StubRoutines::x86::counter_mask_addr()), Assembler::AVX_512bit, rbx);
// Shuffle counter
vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
// Load mask for incrementing counter
evmovdquq(COUNTER_INC_MASK, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 128), Assembler::AVX_512bit, rbx);
// Pre-increment counter
vpaddd(ZTMP5, CTR_BLOCKx, ExternalAddress(StubRoutines::x86::counter_mask_addr() + 64), Assembler::AVX_512bit, rbx);
vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
// Begin 32 blocks of AES processing
bind(AES_32_BLOCKS);
// Save incremented counter before overwriting it with AES data
evmovdquq(CTR_BLOCKx, ZTMP8, Assembler::AVX_512bit);
// Move 256 bytes of data
loadData(in, pos, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
// Load key shuffle mask
movdqu(xmm29, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx);
// Load 0th AES round key
ev_load_key(ZTMP4, key, 0, xmm29);
// AES-ROUND0, xmm24 has the shuffle mask
shuffleExorRnd1Key(ZTMP5, ZTMP6, ZTMP7, ZTMP8, xmm24, ZTMP4);
for (int j = 1; j < 10; j++) {
ev_load_key(ZTMP4, key, j * 16, xmm29);
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
}
ev_load_key(ZTMP4, key, 10 * 16, xmm29);
// AES rounds upto 11 (AES192) or 13 (AES256)
cmpl(rounds, 52);
jcc(Assembler::greaterEqual, AES_192);
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
jmp(STORE_CT);
bind(AES_192);
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
ev_load_key(ZTMP4, key, 11 * 16, xmm29);
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
cmpl(rounds, 60);
jcc(Assembler::aboveEqual, AES_256);
ev_load_key(ZTMP4, key, 12 * 16, xmm29);
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
jmp(STORE_CT);
bind(AES_256);
ev_load_key(ZTMP4, key, 12 * 16, xmm29);
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
ev_load_key(ZTMP4, key, 13 * 16, xmm29);
roundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
ev_load_key(ZTMP4, key, 14 * 16, xmm29);
// Last AES round
lastroundEncode(ZTMP4, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
bind(STORE_CT);
// Xor the encrypted key with PT to obtain CT
xorBeforeStore(ZTMP5, ZTMP6, ZTMP7, ZTMP8, ZTMP0, ZTMP1, ZTMP2, ZTMP3);
storeData(out, pos, ZTMP5, ZTMP6, ZTMP7, ZTMP8);
// 16 blocks encryption completed
addl(pos, 256);
cmpl(pos, 512);
jcc(Assembler::aboveEqual, GHASH_AES_PARALLEL);
vpaddd(ZTMP5, CTR_BLOCKx, COUNTER_INC_MASK, Assembler::AVX_512bit);
vpaddd(ZTMP6, ZTMP5, COUNTER_INC_MASK, Assembler::AVX_512bit);
vpaddd(ZTMP7, ZTMP6, COUNTER_INC_MASK, Assembler::AVX_512bit);
vpaddd(ZTMP8, ZTMP7, COUNTER_INC_MASK, Assembler::AVX_512bit);
jmp(AES_32_BLOCKS);
bind(GHASH_AES_PARALLEL);
// Ghash16_encrypt16_parallel takes place in the order with three reduction values:
// 1) First time -> cipher xor input ghash
// 2) No reduction -> accumulate multiplication values
// 3) Final reduction post 48 blocks -> new ghash value is computed for the next round
// Reduction value = first time
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
addl(pos, 256);
addl(ghash_pos, 256);
index += 4;
// At this point we have processed 768 bytes of AES and 256 bytes of GHASH.
// If the remaining length is less than 768, process remaining 512 bytes of ghash in GHASH_LAST_32 code
subl(len, 768);
cmpl(len, 768);
jcc(Assembler::less, GHASH_LAST_32);
// AES 16 blocks and GHASH 16 blocks in parallel
// For multiples of 48 blocks we will do ghash16_encrypt16 interleaved multiple times
// Reduction value = no reduction means that the carryless multiplication values are accumulated for further calculations
// Each call uses 4 subkeyHtbl values, so increment the index by 4.
bind(GHASH_16_AES_16);
// Reduction value = no reduction
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
addl(pos, 256);
addl(ghash_pos, 256);
index += 4;
// Reduction value = final reduction means that the accumulated values have to be reduced as we have completed 48 blocks of ghash
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, false, xmm24, false, rounds, ghash_pos, true, index, COUNTER_INC_MASK);
addl(pos, 256);
addl(ghash_pos, 256);
// Calculated ghash value needs to be moved to AAD_HASHX so that we can restart the ghash16-aes16 pipeline
movdqu(AAD_HASHx, ZTMP5);
index = 0; // Reset subkeyHtbl index
// Restart the pipeline
// Reduction value = first time
ghash16_encrypt16_parallel(key, subkeyHtbl, CTR_BLOCKx, AAD_HASHx, in, out, ct, pos, true, xmm24, true, rounds, ghash_pos, false, index, COUNTER_INC_MASK);
addl(pos, 256);
addl(ghash_pos, 256);
index += 4;
subl(len, 768);
cmpl(len, 768);
jcc(Assembler::greaterEqual, GHASH_16_AES_16);
// GHASH last 32 blocks processed here
// GHASH products accumulated in ZMM27, ZMM25 and ZMM26 during GHASH16-AES16 operation is used
bind(GHASH_LAST_32);
// Use rbx as a pointer to the htbl; For last 32 blocks of GHASH, use key# 4-11 entry in subkeyHtbl
movl(rbx, 256);
// Load cipher blocks
evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
// Load ghash keys
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
// Ghash blocks 0 - 3
carrylessMultiply(ZTMP2, ZTMP3, ZTMP4, ZTMP1, ZTMP13, ZTMP15);
// Ghash blocks 4 - 7
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP14, ZTMP16);
vpternlogq(ZTMP1, 0x96, ZTMP5, xmm27, Assembler::AVX_512bit); // ZTMP1 = ZTMP1 + ZTMP5 + zmm27
vpternlogq(ZTMP2, 0x96, ZTMP6, xmm26, Assembler::AVX_512bit); // ZTMP2 = ZTMP2 + ZTMP6 + zmm26
vpternlogq(ZTMP3, 0x96, ZTMP7, xmm25, Assembler::AVX_512bit); // ZTMP3 = ZTMP3 + ZTMP7 + zmm25
evpxorq(ZTMP4, ZTMP4, ZTMP8, Assembler::AVX_512bit); // ZTMP4 = ZTMP4 + ZTMP8
addl(ghash_pos, 128);
addl(rbx, 128);
// Ghash remaining blocks
bind(LOOP);
cmpl(ghash_pos, pos);
jcc(Assembler::aboveEqual, ACCUMULATE);
// Load next cipher blocks and corresponding ghash keys
evmovdquq(ZTMP13, Address(ct, ghash_pos, Address::times_1, 0 * 64), Assembler::AVX_512bit);
evmovdquq(ZTMP14, Address(ct, ghash_pos, Address::times_1, 1 * 64), Assembler::AVX_512bit);
vpshufb(ZTMP13, ZTMP13, xmm24, Assembler::AVX_512bit);
vpshufb(ZTMP14, ZTMP14, xmm24, Assembler::AVX_512bit);
evmovdquq(ZTMP15, Address(subkeyHtbl, rbx, Address::times_1, 0 * 64 + 144), Assembler::AVX_512bit);
evmovdquq(ZTMP16, Address(subkeyHtbl, rbx, Address::times_1, 1 * 64 + 144), Assembler::AVX_512bit);
// ghash blocks 0 - 3
carrylessMultiply(ZTMP6, ZTMP7, ZTMP8, ZTMP5, ZTMP13, ZTMP15);
// ghash blocks 4 - 7
carrylessMultiply(ZTMP10, ZTMP11, ZTMP12, ZTMP9, ZTMP14, ZTMP16);
// update sums
// ZTMP1 = ZTMP1 + ZTMP5 + ZTMP9
// ZTMP2 = ZTMP2 + ZTMP6 + ZTMP10
// ZTMP3 = ZTMP3 + ZTMP7 xor ZTMP11
// ZTMP4 = ZTMP4 + ZTMP8 xor ZTMP12
xorGHASH(ZTMP1, ZTMP2, ZTMP3, ZTMP4, ZTMP5, ZTMP9, ZTMP6, ZTMP10, ZTMP7, ZTMP11, ZTMP8, ZTMP12);
addl(ghash_pos, 128);
addl(rbx, 128);
jmp(LOOP);
// Integrate ZTMP3/ZTMP4 into ZTMP1 and ZTMP2
bind(ACCUMULATE);
evpxorq(ZTMP3, ZTMP3, ZTMP4, Assembler::AVX_512bit);
vpsrldq(ZTMP7, ZTMP3, 8, Assembler::AVX_512bit);
vpslldq(ZTMP8, ZTMP3, 8, Assembler::AVX_512bit);
evpxorq(ZTMP1, ZTMP1, ZTMP7, Assembler::AVX_512bit);
evpxorq(ZTMP2, ZTMP2, ZTMP8, Assembler::AVX_512bit);
// Add ZTMP1 and ZTMP2 128 - bit words horizontally
vhpxori4x128(ZTMP1, ZTMP11);
vhpxori4x128(ZTMP2, ZTMP12);
// Load reduction polynomial and compute final reduction
evmovdquq(ZTMP15, ExternalAddress(StubRoutines::x86::ghash_polynomial512_addr()), Assembler::AVX_512bit, rbx);
vclmul_reduce(AAD_HASHx, ZTMP15, ZTMP1, ZTMP2, ZTMP3, ZTMP4);
// Pre-increment counter for next operation
vpaddd(CTR_BLOCKx, CTR_BLOCKx, xmm18, Assembler::AVX_128bit);
// Shuffle counter and save the updated value
vpshufb(CTR_BLOCKx, CTR_BLOCKx, xmm24, Assembler::AVX_512bit);
movdqu(Address(counter, 0), CTR_BLOCKx);
// Load ghash lswap mask
movdqu(xmm24, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
// Shuffle ghash using lbswap_mask and store it
vpshufb(AAD_HASHx, AAD_HASHx, xmm24, Assembler::AVX_128bit);
movdqu(Address(state, 0), AAD_HASHx);
jmp(ENC_DEC_DONE);
bind(GENERATE_HTBL_48_BLKS);
generateHtbl_48_block_zmm(subkeyHtbl);
bind(ENC_DEC_DONE);
movq(rax, pos);
}
#endif // _LP64

@ -4368,6 +4368,95 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address ghash_polynomial512_addr() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "_ghash_poly512_addr");
address start = __ pc();
__ emit_data64(0x00000001C2000000, relocInfo::none); // POLY for reduction
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x00000001C2000000, relocInfo::none);
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none); // POLY
__ emit_data64(0xC200000000000000, relocInfo::none);
__ emit_data64(0x0000000000000001, relocInfo::none); // TWOONE
__ emit_data64(0x0000000100000000, relocInfo::none);
return start;
}
// Vector AES Galois Counter Mode implementation. Parameters:
// Windows regs | Linux regs
// in = c_rarg0 (rcx) | c_rarg0 (rsi)
// len = c_rarg1 (rdx) | c_rarg1 (rdi)
// ct = c_rarg2 (r8) | c_rarg2 (rdx)
// out = c_rarg3 (r9) | c_rarg3 (rcx)
// key = r10 | c_rarg4 (r8)
// state = r13 | c_rarg5 (r9)
// subkeyHtbl = r14 | r11
// counter = rsi | r12
// return - number of processed bytes
address generate_galoisCounterMode_AESCrypt() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "galoisCounterMode_AESCrypt");
address start = __ pc();
const Register in = c_rarg0;
const Register len = c_rarg1;
const Register ct = c_rarg2;
const Register out = c_rarg3;
// and updated with the incremented counter in the end
#ifndef _WIN64
const Register key = c_rarg4;
const Register state = c_rarg5;
const Address subkeyH_mem(rbp, 2 * wordSize);
const Register subkeyHtbl = r11;
const Address counter_mem(rbp, 3 * wordSize);
const Register counter = r12;
#else
const Address key_mem(rbp, 6 * wordSize);
const Register key = r10;
const Address state_mem(rbp, 7 * wordSize);
const Register state = r13;
const Address subkeyH_mem(rbp, 8 * wordSize);
const Register subkeyHtbl = r14;
const Address counter_mem(rbp, 9 * wordSize);
const Register counter = rsi;
#endif
__ enter();
// Save state before entering routine
__ push(r12);
__ push(r13);
__ push(r14);
__ push(r15);
__ push(rbx);
#ifdef _WIN64
// on win64, fill len_reg from stack position
__ push(rsi);
__ movptr(key, key_mem);
__ movptr(state, state_mem);
#endif
__ movptr(subkeyHtbl, subkeyH_mem);
__ movptr(counter, counter_mem);
__ aesgcm_encrypt(in, len, ct, out, key, state, subkeyHtbl, counter);
// Restore state before leaving routine
#ifdef _WIN64
__ pop(rsi);
#endif
__ pop(rbx);
__ pop(r15);
__ pop(r14);
__ pop(r13);
__ pop(r12);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// This mask is used for incrementing counter value(linc0, linc4, etc.)
address counter_mask_addr() {
__ align(64);
@ -7618,13 +7707,20 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptVectorAESCrypt();
StubRoutines::_electronicCodeBook_encryptAESCrypt = generate_electronicCodeBook_encryptAESCrypt();
StubRoutines::_electronicCodeBook_decryptAESCrypt = generate_electronicCodeBook_decryptAESCrypt();
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
StubRoutines::x86::_ghash_poly512_addr = ghash_polynomial512_addr();
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
StubRoutines::_galoisCounterMode_AESCrypt = generate_galoisCounterMode_AESCrypt();
} else {
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
}
}
if (UseAESCTRIntrinsics) {
if (VM_Version::supports_avx512_vaes() && VM_Version::supports_avx512bw() && VM_Version::supports_avx512vl()) {
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
if (StubRoutines::x86::_counter_mask_addr == NULL) {
StubRoutines::x86::_counter_mask_addr = counter_mask_addr();
}
StubRoutines::_counterMode_AESCrypt = generate_counterMode_VectorAESCrypt();
} else {
StubRoutines::x86::_counter_shuffle_mask_addr = generate_counter_shuffle_mask();
@ -7664,7 +7760,9 @@ address generate_avx_ghash_processBlocks() {
// Generate GHASH intrinsics code
if (UseGHASHIntrinsics) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
if (StubRoutines::x86::_ghash_long_swap_mask_addr == NULL) {
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
}
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
if (VM_Version::supports_avx()) {
StubRoutines::x86::_ghash_shuffmask_addr = ghash_shufflemask_addr();

@ -80,6 +80,7 @@ address StubRoutines::x86::_join_0_1_base64 = NULL;
address StubRoutines::x86::_join_1_2_base64 = NULL;
address StubRoutines::x86::_join_2_3_base64 = NULL;
address StubRoutines::x86::_decoding_table_base64 = NULL;
address StubRoutines::x86::_ghash_poly512_addr = NULL;
#endif
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;

@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small)
code_size2 = 35300 LP64_ONLY(+32000) // simply increase if too small (assembler will crash if too small)
};
class x86 {
@ -198,6 +198,7 @@ class x86 {
static address _join_1_2_base64;
static address _join_2_3_base64;
static address _decoding_table_base64;
static address _ghash_poly512_addr;
#endif
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
@ -254,6 +255,7 @@ class x86 {
static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; }
static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; }
static address crc_table_avx512_addr() { return (address)_crc_table_avx512; }
static address ghash_polynomial512_addr() { return _ghash_poly512_addr; }
#endif // _LP64
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }

@ -182,6 +182,7 @@ int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
case vmIntrinsics::_counterMode_AESCrypt:
case vmIntrinsics::_galoisCounterMode_AESCrypt:
return 1;
case vmIntrinsics::_digestBase_implCompressMB:
return 5;
@ -429,6 +430,9 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
case vmIntrinsics::_counterMode_AESCrypt:
if (!UseAESCTRIntrinsics) return true;
break;
case vmIntrinsics::_galoisCounterMode_AESCrypt:
if (!UseAESIntrinsics) return true;
break;
case vmIntrinsics::_md5_implCompress:
if (!UseMD5Intrinsics) return true;
break;

@ -415,6 +415,11 @@ class methodHandle;
do_intrinsic(_counterMode_AESCrypt, com_sun_crypto_provider_counterMode, crypt_name, byteArray_int_int_byteArray_int_signature, F_R) \
do_name( crypt_name, "implCrypt") \
\
do_class(com_sun_crypto_provider_galoisCounterMode, "com/sun/crypto/provider/GaloisCounterMode") \
do_intrinsic(_galoisCounterMode_AESCrypt, com_sun_crypto_provider_galoisCounterMode, gcm_crypt_name, aes_gcm_signature, F_S) \
do_name(gcm_crypt_name, "implGCMCrypt") \
do_signature(aes_gcm_signature, "([BII[BI[BILcom/sun/crypto/provider/GCTR;Lcom/sun/crypto/provider/GHASH;)I") \
\
/* support for sun.security.provider.MD5 */ \
do_class(sun_security_provider_md5, "sun/security/provider/MD5") \
do_intrinsic(_md5_implCompress, sun_security_provider_md5, implCompress_name, implCompress_signature, F_R) \

@ -308,6 +308,7 @@
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
static_field(StubRoutines, _counterMode_AESCrypt, address) \
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
static_field(StubRoutines, _base64_encodeBlock, address) \
static_field(StubRoutines, _base64_decodeBlock, address) \
static_field(StubRoutines, _ghash_processBlocks, address) \

@ -640,6 +640,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_electronicCodeBook_encryptAESCrypt:
case vmIntrinsics::_electronicCodeBook_decryptAESCrypt:
case vmIntrinsics::_counterMode_AESCrypt:
case vmIntrinsics::_galoisCounterMode_AESCrypt:
case vmIntrinsics::_md5_implCompress:
case vmIntrinsics::_sha_implCompress:
case vmIntrinsics::_sha2_implCompress:

@ -1087,6 +1087,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_encryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "decodeBlock") == 0 ||

@ -2535,7 +2535,7 @@ Node* GraphKit::make_runtime_call(int flags,
if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
/* close each nested if ===> */ } } } } } } } }
/* close each nested if ===> */ } } } } } } } }
assert(call->in(call->req()-1) != NULL, "must initialize all parms");
if (!is_leaf) {

@ -547,6 +547,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_counterMode_AESCrypt:
return inline_counterMode_AESCrypt(intrinsic_id());
case vmIntrinsics::_galoisCounterMode_AESCrypt:
return inline_galoisCounterMode_AESCrypt();
case vmIntrinsics::_md5_implCompress:
case vmIntrinsics::_sha_implCompress:
case vmIntrinsics::_sha2_implCompress:
@ -713,6 +716,8 @@ Node* LibraryCallKit::try_to_predicate(int predicate) {
return inline_counterMode_AESCrypt_predicate();
case vmIntrinsics::_digestBase_implCompressMB:
return inline_digestBase_implCompressMB_predicate(predicate);
case vmIntrinsics::_galoisCounterMode_AESCrypt:
return inline_galoisCounterMode_AESCrypt_predicate();
default:
// If you get here, it may be that someone has added a new intrinsic
@ -6679,6 +6684,134 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(Node* digestBase_obj, ciIn
return true;
}
//------------------------------inline_galoisCounterMode_AESCrypt-----------------------
bool LibraryCallKit::inline_galoisCounterMode_AESCrypt() {
assert(UseAES, "need AES instruction support");
address stubAddr = NULL;
const char *stubName = NULL;
stubAddr = StubRoutines::galoisCounterMode_AESCrypt();
stubName = "galoisCounterMode_AESCrypt";
if (stubAddr == NULL) return false;
Node* in = argument(0);
Node* inOfs = argument(1);
Node* len = argument(2);
Node* ct = argument(3);
Node* ctOfs = argument(4);
Node* out = argument(5);
Node* outOfs = argument(6);
Node* gctr_object = argument(7);
Node* ghash_object = argument(8);
// (1) in, ct and out are arrays.
const Type* in_type = in->Value(&_gvn);
const Type* ct_type = ct->Value(&_gvn);
const Type* out_type = out->Value(&_gvn);
const TypeAryPtr* top_in = in_type->isa_aryptr();
const TypeAryPtr* top_ct = ct_type->isa_aryptr();
const TypeAryPtr* top_out = out_type->isa_aryptr();
assert(top_in != NULL && top_in->klass() != NULL &&
top_ct != NULL && top_ct->klass() != NULL &&
top_out != NULL && top_out->klass() != NULL, "args are strange");
// checks are the responsibility of the caller
Node* in_start = in;
Node* ct_start = ct;
Node* out_start = out;
if (inOfs != NULL || ctOfs != NULL || outOfs != NULL) {
assert(inOfs != NULL && ctOfs != NULL && outOfs != NULL, "");
in_start = array_element_address(in, inOfs, T_BYTE);
ct_start = array_element_address(ct, ctOfs, T_BYTE);
out_start = array_element_address(out, outOfs, T_BYTE);
}
// if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
// (because of the predicated logic executed earlier).
// so we cast it here safely.
// this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
Node* embeddedCipherObj = load_field_from_object(gctr_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;");
Node* counter = load_field_from_object(gctr_object, "counter", "[B");
Node* subkeyHtbl = load_field_from_object(ghash_object, "subkeyHtbl", "[J");
Node* state = load_field_from_object(ghash_object, "state", "[J");
if (embeddedCipherObj == NULL || counter == NULL || subkeyHtbl == NULL || state == NULL) {
return false;
}
// cast it to what we know it will be at runtime
const TypeInstPtr* tinst = _gvn.type(gctr_object)->isa_instptr();
assert(tinst != NULL, "GCTR obj is null");
assert(tinst->klass()->is_loaded(), "GCTR obj is not loaded");
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
assert(klass_AESCrypt->is_loaded(), "predicate checks that this class is loaded");
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
const TypeOopPtr* xtype = aklass->as_instance_type();
Node* aescrypt_object = new CheckCastPPNode(control(), embeddedCipherObj, xtype);
aescrypt_object = _gvn.transform(aescrypt_object);
// we need to get the start of the aescrypt_object's expanded key array
Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
if (k_start == NULL) return false;
// similarly, get the start address of the r vector
Node* cnt_start = array_element_address(counter, intcon(0), T_BYTE);
Node* state_start = array_element_address(state, intcon(0), T_LONG);
Node* subkeyHtbl_start = array_element_address(subkeyHtbl, intcon(0), T_LONG);
// Call the stub, passing params
Node* gcmCrypt = make_runtime_call(RC_LEAF|RC_NO_FP,
OptoRuntime::galoisCounterMode_aescrypt_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
in_start, len, ct_start, out_start, k_start, state_start, subkeyHtbl_start, cnt_start);
// return cipher length (int)
Node* retvalue = _gvn.transform(new ProjNode(gcmCrypt, TypeFunc::Parms));
set_result(retvalue);
return true;
}
//----------------------------inline_galoisCounterMode_AESCrypt_predicate----------------------------
// Return node representing slow path of predicate check.
// the pseudo code we want to emulate with this predicate is:
// for encryption:
// if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
// for decryption:
// if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
// note cipher==plain is more conservative than the original java code but that's OK
//
Node* LibraryCallKit::inline_galoisCounterMode_AESCrypt_predicate() {
// The receiver was checked for NULL already.
Node* objGCTR = argument(7);
// Load embeddedCipher field of GCTR object.
Node* embeddedCipherObj = load_field_from_object(objGCTR, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;");
assert(embeddedCipherObj != NULL, "embeddedCipherObj is null");
// get AESCrypt klass for instanceOf check
// AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
// will have same classloader as CipherBlockChaining object
const TypeInstPtr* tinst = _gvn.type(objGCTR)->isa_instptr();
assert(tinst != NULL, "GCTR obj is null");
assert(tinst->klass()->is_loaded(), "GCTR obj is not loaded");
// we want to do an instanceof comparison against the AESCrypt class
ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
if (!klass_AESCrypt->is_loaded()) {
// if AESCrypt is not even loaded, we never take the intrinsic fast path
Node* ctrl = control();
set_control(top()); // no regular fast path
return ctrl;
}
ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
Node* cmp_instof = _gvn.transform(new CmpINode(instof, intcon(1)));
Node* bool_instof = _gvn.transform(new BoolNode(cmp_instof, BoolTest::ne));
Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
return instof_false; // even if it is NULL
}
//------------------------------get_state_from_digest_object-----------------------
Node * LibraryCallKit::get_state_from_digest_object(Node *digest_object, const char *state_type) {
Node* digest_state = load_field_from_object(digest_object, "state", state_type);

@ -305,6 +305,8 @@ class LibraryCallKit : public GraphKit {
bool inline_fma(vmIntrinsics::ID id);
bool inline_character_compare(vmIntrinsics::ID id);
bool inline_fp_min_max(vmIntrinsics::ID id);
bool inline_galoisCounterMode_AESCrypt();
Node* inline_galoisCounterMode_AESCrypt_predicate();
bool inline_profileBoolean();
bool inline_isCompileConstant();

@ -955,6 +955,31 @@ const TypeFunc* OptoRuntime::counterMode_aescrypt_Type() {
return TypeFunc::make(domain, range);
}
//for counterMode calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::galoisCounterMode_aescrypt_Type() {
// create input type (domain)
int num_args = 8;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // byte[] in + inOfs
fields[argp++] = TypeInt::INT; // int len
fields[argp++] = TypePtr::NOTNULL; // byte[] ct + ctOfs
fields[argp++] = TypePtr::NOTNULL; // byte[] out + outOfs
fields[argp++] = TypePtr::NOTNULL; // byte[] key from AESCrypt obj
fields[argp++] = TypePtr::NOTNULL; // long[] state from GHASH obj
fields[argp++] = TypePtr::NOTNULL; // long[] subkeyHtbl from GHASH obj
fields[argp++] = TypePtr::NOTNULL; // byte[] counter from GCTR obj
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
// returning cipher len (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms + 0] = TypeInt::INT;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
return TypeFunc::make(domain, range);
}
/*
* void implCompress(byte[] buf, int ofs)
*/

@ -257,6 +257,7 @@ private:
static const TypeFunc* cipherBlockChaining_aescrypt_Type();
static const TypeFunc* electronicCodeBook_aescrypt_Type();
static const TypeFunc* counterMode_aescrypt_Type();
static const TypeFunc* galoisCounterMode_aescrypt_Type();
static const TypeFunc* digestBase_implCompress_Type(bool is_sha3);
static const TypeFunc* digestBase_implCompressMB_Type(bool is_sha3);

@ -124,6 +124,7 @@ address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
address StubRoutines::_electronicCodeBook_encryptAESCrypt = NULL;
address StubRoutines::_electronicCodeBook_decryptAESCrypt = NULL;
address StubRoutines::_counterMode_AESCrypt = NULL;
address StubRoutines::_galoisCounterMode_AESCrypt = NULL;
address StubRoutines::_ghash_processBlocks = NULL;
address StubRoutines::_base64_encodeBlock = NULL;
address StubRoutines::_base64_decodeBlock = NULL;

@ -206,6 +206,7 @@ class StubRoutines: AllStatic {
static address _electronicCodeBook_encryptAESCrypt;
static address _electronicCodeBook_decryptAESCrypt;
static address _counterMode_AESCrypt;
static address _galoisCounterMode_AESCrypt;
static address _ghash_processBlocks;
static address _base64_encodeBlock;
static address _base64_decodeBlock;
@ -410,6 +411,7 @@ class StubRoutines: AllStatic {
static address montgomerySquare() { return _montgomerySquare; }
static address bigIntegerRightShift() { return _bigIntegerRightShiftWorker; }
static address bigIntegerLeftShift() { return _bigIntegerLeftShiftWorker; }
static address galoisCounterMode_AESCrypt() { return _galoisCounterMode_AESCrypt; }
static address vectorizedMismatch() { return _vectorizedMismatch; }

@ -540,6 +540,7 @@
static_field(StubRoutines, _electronicCodeBook_encryptAESCrypt, address) \
static_field(StubRoutines, _electronicCodeBook_decryptAESCrypt, address) \
static_field(StubRoutines, _counterMode_AESCrypt, address) \
static_field(StubRoutines, _galoisCounterMode_AESCrypt, address) \
static_field(StubRoutines, _ghash_processBlocks, address) \
static_field(StubRoutines, _base64_encodeBlock, address) \
static_field(StubRoutines, _base64_decodeBlock, address) \

@ -122,7 +122,7 @@ final class GHASH implements Cloneable, GCM {
/* subkeyHtbl and state are stored in long[] for GHASH intrinsic use */
// hashtable subkeyHtbl holds 2*9 powers of subkeyH computed using
// hashtable subkeyHtbl holds 2*57 powers of subkeyH computed using
// carry-less multiplication
private long[] subkeyHtbl;
@ -143,7 +143,9 @@ final class GHASH implements Cloneable, GCM {
throw new ProviderException("Internal error");
}
state = new long[2];
subkeyHtbl = new long[2*9];
// 48 keys for the interleaved implementation,
// 8 for avx-ghash implementation and 1 for the original key
subkeyHtbl = new long[2*57];
subkeyHtbl[0] = (long)asLongView.get(subkeyH, 0);
subkeyHtbl[1] = (long)asLongView.get(subkeyH, 8);
}
@ -264,7 +266,7 @@ final class GHASH implements Cloneable, GCM {
throw new RuntimeException("internal state has invalid length: " +
st.length);
}
if (subH.length != 18) {
if (subH.length != 114) {
throw new RuntimeException("internal subkeyHtbl has invalid length: " +
subH.length);
}

@ -25,6 +25,7 @@
package com.sun.crypto.provider;
import jdk.internal.misc.Unsafe;
import sun.nio.ch.DirectBuffer;
import sun.security.jca.JCAUtil;
import sun.security.util.ArrayUtil;
@ -55,6 +56,8 @@ import java.security.spec.AlgorithmParameterSpec;
import java.security.spec.InvalidParameterSpecException;
import java.util.Arrays;
import jdk.internal.vm.annotation.IntrinsicCandidate;
/**
* This class represents ciphers in GaloisCounter (GCM) mode.
*
@ -82,6 +85,8 @@ abstract class GaloisCounterMode extends CipherSpi {
private static final int MAX_BUF_SIZE = Integer.MAX_VALUE;
// data size when buffer is divided up to aid in intrinsics
private static final int TRIGGERLEN = 65536; // 64k
// x86-64 parallel intrinsic data size
private static final int PARALLEL_LEN = 768;
static final byte[] EMPTY_BUF = new byte[0];
@ -566,35 +571,64 @@ abstract class GaloisCounterMode extends CipherSpi {
}
/**
* Calculate if the given data lengths and the already processed data
* exceeds the maximum allowed processed data by GCM.
* @param lengths lengths of unprocessed data.
* Intrinsic for Vector AES Galois Counter Mode implementation.
* AES and GHASH operations are interleaved in the intrinsic implementation.
* return - number of processed bytes
*
* Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic.
* inLen that is less than 768 size block sizes, before or after this
* intrinsic is used, will be done by the calling method
* @param in input buffer
* @param inOfs input offset
* @param inLen input length
* @param ct buffer that ghash will read (in for encrypt, out for decrypt)
* @param ctOfs offset for ct buffer
* @param out output buffer
* @param outOfs output offset
* @param gctr object for the GCTR operation
* @param ghash object for the ghash operation
* @return number of processed bytes
*/
private void checkDataLength(int ... lengths) {
int max = MAX_BUF_SIZE;
for (int len : lengths) {
max = Math.subtractExact(max, len);
}
if (engine.processed > max) {
throw new ProviderException("SunJCE provider only supports " +
"input size up to " + MAX_BUF_SIZE + " bytes");
@IntrinsicCandidate
private static int implGCMCrypt(byte[] in, int inOfs, int inLen,
byte[] ct, int ctOfs, byte[] out, int outOfs,
GCTR gctr, GHASH ghash) {
inLen -= (inLen % PARALLEL_LEN);
int len = 0;
int cOfs = ctOfs;
if (inLen >= TRIGGERLEN) {
int i = 0;
int segments = (inLen / 6);
segments -= segments % gctr.blockSize;
do {
len += gctr.update(in, inOfs + len, segments, out,
outOfs + len);
ghash.update(ct, cOfs, segments);
cOfs = ctOfs + len;
} while (++i < 5);
inLen -= len;
}
len += gctr.update(in, inOfs + len, inLen, out, outOfs + len);
ghash.update(ct, cOfs, inLen);
return len;
}
/**
* Abstract class for GCMEncrypt and GCMDecrypt internal context objects
*/
abstract class GCMEngine {
byte[] preCounterBlock;
GCTR gctrPAndC;
GHASH ghashAllToS;
GCTR gctr;
GHASH ghash;
// Block size of the algorithm
final int blockSize;
// length of total data, i.e. len(C)
int processed = 0;
// buffer for AAD data; if null, meaning update has been called
ByteArrayOutputStream aadBuffer = null;
int sizeOfAAD = 0;
@ -608,7 +642,6 @@ abstract class GaloisCounterMode extends CipherSpi {
byte[] originalOut = null;
int originalOutOfs = 0;
GCMEngine(SymmetricCipher blockCipher) {
blockSize = blockCipher.getBlockSize();
byte[] subkeyH = new byte[blockSize];
@ -616,8 +649,8 @@ abstract class GaloisCounterMode extends CipherSpi {
preCounterBlock = getJ0(iv, subkeyH, blockSize);
byte[] j0Plus1 = preCounterBlock.clone();
increment32(j0Plus1);
gctrPAndC = new GCTR(blockCipher, j0Plus1);
ghashAllToS = new GHASH(subkeyH);
gctr = new GCTR(blockCipher, j0Plus1);
ghash = new GHASH(subkeyH);
}
/**
@ -631,15 +664,15 @@ abstract class GaloisCounterMode extends CipherSpi {
abstract int getOutputSize(int inLen, boolean isFinal);
// Update operations
abstract byte[] doUpdate(byte[] in, int inOff, int inLen);
abstract int doUpdate(byte[] in, int inOff, int inLen, byte[] out,
int outOff) throws ShortBufferException;
abstract byte[] doUpdate(byte[] in, int inOfs, int inLen);
abstract int doUpdate(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) throws ShortBufferException;
abstract int doUpdate(ByteBuffer src, ByteBuffer dst)
throws ShortBufferException;
// Final operations
abstract int doFinal(byte[] in, int inOff, int inLen, byte[] out,
int outOff) throws IllegalBlockSizeException, AEADBadTagException,
abstract int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) throws IllegalBlockSizeException, AEADBadTagException,
ShortBufferException;
abstract int doFinal(ByteBuffer src, ByteBuffer dst)
throws IllegalBlockSizeException, AEADBadTagException,
@ -657,6 +690,48 @@ abstract class GaloisCounterMode extends CipherSpi {
return (ibuffer == null ? 0 : ibuffer.size());
}
/**
* ByteBuffer wrapper for intrinsic implGCMCrypt. It will operate
* on 768 byte blocks and let the calling method operate on smaller
* sizes.
*/
int implGCMCrypt(ByteBuffer src, ByteBuffer dst) {
int srcLen = src.remaining() - (src.remaining() % PARALLEL_LEN);
if (srcLen < PARALLEL_LEN) {
return 0;
}
int len;
if (src.hasArray() && dst.hasArray()) {
ByteBuffer ct = (encryption ? dst : src);
len = GaloisCounterMode.implGCMCrypt(src.array(),
src.arrayOffset() + src.position(), srcLen,
ct.array(), ct.arrayOffset() + ct.position(),
dst.array(), dst.arrayOffset() + dst.position(),
gctr, ghash);
src.position(src.position() + len);
dst.position(dst.position() + len);
return len;
} else {
byte[] bin = new byte[PARALLEL_LEN];
byte[] bout = new byte[PARALLEL_LEN];
byte[] ct = (encryption ? bout : bin);
len = srcLen;
do {
src.get(bin, 0, PARALLEL_LEN);
len -= GaloisCounterMode.implGCMCrypt(bin, 0, PARALLEL_LEN,
ct, 0, bout, 0, gctr, ghash);
dst.put(bout, 0, PARALLEL_LEN);
} while (len >= PARALLEL_LEN);
return srcLen - len;
}
}
/**
* The method takes two buffers to create one block of data. The
* difference with the other mergeBlock is this will calculate
@ -704,8 +779,6 @@ abstract class GaloisCounterMode extends CipherSpi {
* (e.g., has not been initialized) or does not accept AAD, and one of
* the {@code update} methods has already been called for the active
* encryption/decryption operation
* @throws UnsupportedOperationException if this method
* has not been overridden by an implementation
*/
void updateAAD(byte[] src, int offset, int len) {
if (encryption) {
@ -733,12 +806,12 @@ abstract class GaloisCounterMode extends CipherSpi {
int lastLen = aad.length % blockSize;
if (lastLen != 0) {
ghashAllToS.update(aad, 0, aad.length - lastLen);
ghash.update(aad, 0, aad.length - lastLen);
byte[] padded = expandToOneBlock(aad,
aad.length - lastLen, lastLen, blockSize);
ghashAllToS.update(padded);
ghash.update(padded);
} else {
ghashAllToS.update(aad);
ghash.update(aad);
}
}
aadBuffer = null;
@ -751,18 +824,28 @@ abstract class GaloisCounterMode extends CipherSpi {
* For input it takes the ibuffer which is wrapped in 'buffer' and 'src'
* from doFinal.
*/
int doLastBlock(GCM op, ByteBuffer buffer, ByteBuffer src, ByteBuffer dst) {
int resultLen = 0;
int doLastBlock(GCMOperation op, ByteBuffer buffer, ByteBuffer src,
ByteBuffer dst) {
int len = 0;
int resultLen;
int bLen = (buffer != null ? buffer.remaining() : 0);
if (bLen > 0) {
// en/decrypt on how much buffer there is in AES_BLOCK_SIZE
// en/decrypt any PARALLEL_LEN sized data in the buffer
if (bLen >= PARALLEL_LEN) {
len = implGCMCrypt(buffer, dst);
bLen -= len;
}
// en/decrypt any blocksize data in the buffer
if (bLen >= blockSize) {
resultLen += op.update(buffer, dst);
resultLen = op.update(buffer, dst);
bLen -= resultLen;
len += resultLen;
}
// Process the remainder in the buffer
if (bLen - resultLen > 0) {
if (bLen > 0) {
// Copy the buffer remainder into an extra block
byte[] block = new byte[blockSize];
int over = buffer.remaining();
@ -773,76 +856,26 @@ abstract class GaloisCounterMode extends CipherSpi {
if (slen > 0) {
src.get(block, over, slen);
}
int len = slen + over;
if (len == blockSize) {
resultLen += op.update(block, 0, blockSize, dst);
int l = slen + over;
if (l == blockSize) {
len += op.update(block, 0, blockSize, dst);
} else {
resultLen += op.doFinal(block, 0, len, block,
0);
len += op.doFinal(block, 0, l, block,0);
if (dst != null) {
dst.put(block, 0, len);
dst.put(block, 0, l);
}
processed += resultLen;
return resultLen;
return len;
}
}
}
// en/decrypt whatever remains in src.
// If src has been consumed, this will be a no-op
if (src.remaining() > TRIGGERLEN) {
resultLen += throttleData(op, src, dst);
if (src.remaining() >= PARALLEL_LEN) {
len += implGCMCrypt(src, dst);
}
resultLen += op.doFinal(src, dst);
processed += resultLen;
return resultLen;
}
/**
* This segments large data into smaller chunks so hotspot will start
* using GCTR and GHASH intrinsics sooner. This is a problem for app
* and perf tests that only use large input sizes.
*/
int throttleData(GCM op, byte[] in, int inOfs, int inLen,
byte[] out, int outOfs) {
int segments = (inLen / 6);
segments -= segments % blockSize;
int len = 0;
int i = 0;
do {
len += op.update(in, inOfs + len, segments, out,outOfs + len);
} while (++i < 5);
len += op.update(in, inOfs + len, inLen - len, out, outOfs + len);
return len;
}
/**
* This segments large data into smaller chunks so hotspot will start
* using GCTR and GHASH intrinsics sooner. This is a problem for app
* and perf tests that only use large input sizes.
*/
int throttleData(GCM op, ByteBuffer src, ByteBuffer dst) {
int inLen = src.limit();
int segments = (src.remaining() / 6);
segments -= segments % blockSize;
int i = 0, resultLen = 0;
do {
src.limit(src.position() + segments);
resultLen += op.update(src, dst);
} while (++i < 5);
src.limit(inLen);
// If there is still at least a blockSize left
if (src.remaining() > blockSize) {
resultLen += op.update(src, dst);
}
return resultLen;
return len + op.doFinal(src, dst);
}
/**
@ -900,7 +933,11 @@ abstract class GaloisCounterMode extends CipherSpi {
// Position plus arrayOffset() will give us the true offset
// from the underlying byte[] address.
if (src.position() + src.arrayOffset() >=
// If during encryption and the input offset is behind or
// the same as the output offset, the same buffer can be
// used. But during decryption always create a new
// buffer in case of a bad auth tag.
if (encryption && src.position() + src.arrayOffset() >=
dst.position() + dst.arrayOffset()) {
return dst;
}
@ -923,12 +960,15 @@ abstract class GaloisCounterMode extends CipherSpi {
}
/**
* Overlap detection for data using byte array.
* This is used for both overlap detection for the data or decryption
* during in-place crypto, so to not overwrite the input if the auth tag
* is invalid.
*
* If an intermediate array is needed, the original out array length is
* allocated because for code simplicity.
*/
byte[] overlapDetection(byte[] in, int inOfs, byte[] out, int outOfs) {
if (in == out && inOfs < outOfs) {
if (in == out && (!encryption || inOfs < outOfs)) {
originalOut = out;
originalOutOfs = outOfs;
return new byte[out.length];
@ -969,11 +1009,31 @@ abstract class GaloisCounterMode extends CipherSpi {
* Encryption Engine object
*/
class GCMEncrypt extends GCMEngine {
GCTRGHASH gctrghash;
GCMOperation op;
// data processed during encryption
int processed = 0;
GCMEncrypt(SymmetricCipher blockCipher) {
super(blockCipher);
gctrghash = new GCTRGHASH(gctrPAndC, ghashAllToS);
op = new EncryptOp(gctr, ghash);
}
/**
* Calculate if the given data lengths and the already processed data
* exceeds the maximum allowed processed data by GCM.
* @param lengths lengths of unprocessed data.
*/
private void checkDataLength(int ... lengths) {
int max = MAX_BUF_SIZE;
for (int len : lengths) {
max = Math.subtractExact(max, len);
if (processed > max) {
throw new ProviderException("SunJCE provider only " +
"supports input size up to " + MAX_BUF_SIZE + " bytes");
}
}
}
@Override
@ -1034,7 +1094,7 @@ abstract class GaloisCounterMode extends CipherSpi {
System.arraycopy(buffer, 0, block, 0, bLen);
System.arraycopy(in, inOfs, block, bLen, remainder);
len = gctrghash.update(block, 0, blockSize, out, outOfs);
len = op.update(block, 0, blockSize, out, outOfs);
inOfs += remainder;
inLen -= remainder;
outOfs += blockSize;
@ -1043,8 +1103,20 @@ abstract class GaloisCounterMode extends CipherSpi {
}
// Encrypt the remaining blocks inside of 'in'
if (inLen >= PARALLEL_LEN) {
int r = GaloisCounterMode.implGCMCrypt(in, inOfs, inLen, out,
outOfs, out, outOfs, gctr, ghash);
len += r;
inOfs += r;
inLen -= r;
outOfs += r;
}
if (inLen >= blockSize) {
len += gctrghash.update(in, inOfs, inLen, out, outOfs);
int r = op.update(in, inOfs, inLen, out, outOfs);
len += r;
inOfs += r;
inLen -= r;
}
// Write any remaining bytes less than a blockSize into ibuffer.
@ -1089,21 +1161,32 @@ abstract class GaloisCounterMode extends CipherSpi {
ByteBuffer buffer = ByteBuffer.wrap(ibuffer.toByteArray());
buffer.get(block, 0, bLen);
src.get(block, bLen, remainder);
len += cryptBlocks(
ByteBuffer.wrap(block, 0, blockSize), dst);
len += op.update(ByteBuffer.wrap(block, 0, blockSize),
dst);
ibuffer.reset();
}
}
// encrypt any blocksized data in 'src'
if (src.remaining() >= blockSize) {
len += cryptBlocks(src, dst);
int srcLen = src.remaining();
int resultLen;
// encrypt any PARALLEL_LEN sized data in 'src'
if (srcLen >= PARALLEL_LEN) {
resultLen = implGCMCrypt(src, dst);
srcLen -= resultLen;
len += resultLen;
}
// encrypt any blocksize data in 'src'
if (srcLen >= blockSize) {
resultLen = op.update(src, dst);
srcLen -= resultLen;
len += resultLen;
}
// Write the remaining bytes into the 'ibuffer'
if (src.remaining() > 0) {
initBuffer(src.remaining());
byte[] b = new byte[src.remaining()];
if (srcLen > 0) {
initBuffer(srcLen);
byte[] b = new byte[srcLen];
src.get(b);
// remainder offset is based on original buffer length
try {
@ -1114,6 +1197,7 @@ abstract class GaloisCounterMode extends CipherSpi {
}
restoreDst(dst);
processed += len;
return len;
}
@ -1127,7 +1211,7 @@ abstract class GaloisCounterMode extends CipherSpi {
try {
ArrayUtil.nullAndBoundsCheck(out, outOfs, getOutputSize(inLen,
true));
} catch (ArrayIndexOutOfBoundsException aiobe) {
} catch (ArrayIndexOutOfBoundsException e) {
throw new ShortBufferException("Output buffer invalid");
}
@ -1136,7 +1220,7 @@ abstract class GaloisCounterMode extends CipherSpi {
processAAD();
out = overlapDetection(in, inOfs, out, outOfs);
int resultLen = 0;
int len = 0;
byte[] block;
// process what is in the ibuffer
@ -1145,18 +1229,16 @@ abstract class GaloisCounterMode extends CipherSpi {
// Make a block if the remaining ibuffer and 'in' can make one.
if (bLen + inLen >= blockSize) {
int r, bufOfs = 0;
int r;
block = new byte[blockSize];
r = mergeBlock(buffer, bufOfs, in, inOfs, inLen, block);
r = mergeBlock(buffer, 0, in, inOfs, inLen, block);
inOfs += r;
inLen -= r;
r = gctrghash.update(block, 0, blockSize, out,
outOfs);
outOfs += r;
resultLen += r;
processed += r;
op.update(block, 0, blockSize, out, outOfs);
outOfs += blockSize;
len += blockSize;
} else {
// Need to consume all the ibuffer here to prepare for doFinal()
// Need to consume the ibuffer here to prepare for doFinal()
block = new byte[bLen + inLen];
System.arraycopy(buffer, 0, block, 0, bLen);
System.arraycopy(in, inOfs, block, bLen, inLen);
@ -1167,28 +1249,18 @@ abstract class GaloisCounterMode extends CipherSpi {
}
// process what is left in the input buffer
if (inLen > TRIGGERLEN) {
int r = throttleData(gctrghash, in, inOfs, inLen, out, outOfs);
inOfs += r;
inLen -= r;
outOfs += r;
resultLen += r;
processed += r;
}
processed += gctrghash.doFinal(in, inOfs, inLen, out, outOfs);
len += op.doFinal(in, inOfs, inLen, out, outOfs);
outOfs += inLen;
resultLen += inLen;
block = getLengthBlock(sizeOfAAD, processed);
ghashAllToS.update(block);
block = ghashAllToS.digest();
block = getLengthBlock(sizeOfAAD, processed + len);
ghash.update(block);
block = ghash.digest();
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
tagLenBytes, block, 0);
// copy the tag to the end of the buffer
System.arraycopy(block, 0, out, outOfs, tagLenBytes);
int len = resultLen + tagLenBytes;
len += tagLenBytes;
restoreOut(out, len);
reInit = true;
@ -1214,7 +1286,7 @@ abstract class GaloisCounterMode extends CipherSpi {
processAAD();
if (len > 0) {
processed += doLastBlock(gctrghash,
processed += doLastBlock(op,
(ibuffer == null || ibuffer.size() == 0) ? null :
ByteBuffer.wrap(ibuffer.toByteArray()), src, dst);
}
@ -1225,8 +1297,8 @@ abstract class GaloisCounterMode extends CipherSpi {
}
byte[] block = getLengthBlock(sizeOfAAD, processed);
ghashAllToS.update(block);
block = ghashAllToS.digest();
ghash.update(block);
block = ghash.digest();
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
tagLenBytes, block, 0);
dst.put(block, 0, tagLenBytes);
@ -1235,18 +1307,6 @@ abstract class GaloisCounterMode extends CipherSpi {
reInit = true;
return (len + tagLenBytes);
}
// Handler method for encrypting blocks
int cryptBlocks(ByteBuffer src, ByteBuffer dst) {
int len;
if (src.remaining() > TRIGGERLEN) {
len = throttleData(gctrghash, src, dst);
} else {
len = gctrghash.update(src, dst);
}
processed += len;
return len;
}
}
/**
@ -1262,6 +1322,22 @@ abstract class GaloisCounterMode extends CipherSpi {
super(blockCipher);
}
/**
* Calculate if the given data lengths exceeds the maximum allowed
* processed data by GCM.
* @param lengths lengths of unprocessed data.
*/
private void checkDataLength(int ... lengths) {
int max = MAX_BUF_SIZE;
for (int len : lengths) {
max = Math.subtractExact(max, len);
if (max < 0) {
throw new ProviderException("SunJCE provider only " +
"supports input size up to " + MAX_BUF_SIZE + " bytes");
}
}
}
@Override
public int getOutputSize(int inLen, boolean isFinal) {
if (!isFinal) {
@ -1311,9 +1387,8 @@ abstract class GaloisCounterMode extends CipherSpi {
processAAD();
if (inLen > 0) {
// store internally until decryptFinal is called because
// spec mentioned that only return recovered data after tag
// is successfully verified
// store internally until doFinal. Per the spec, data is
// returned after tag is successfully verified.
initBuffer(inLen);
ibuffer.write(in, inOfs, inLen);
}
@ -1350,38 +1425,43 @@ abstract class GaloisCounterMode extends CipherSpi {
}
/**
* Use any data from ibuffer and 'in' to first verify the auth tag. If
* the tag is valid, decrypt the data.
* Use available data from ibuffer and 'in' to verify and decrypt the
* data. If the verification fails, the 'out' left to it's original
* values if crypto was in-place; otherwise 'out' is zeroed
*/
@Override
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) throws IllegalBlockSizeException, AEADBadTagException,
ShortBufferException {
GHASH save = null;
int len = inLen + getBufferedLength();
if (len < tagLenBytes) {
throw new AEADBadTagException("Input data too short to " +
"contain an expected tag length of " + tagLenBytes +
"bytes");
}
try {
ArrayUtil.nullAndBoundsCheck(out, outOfs, len - tagLenBytes);
} catch (ArrayIndexOutOfBoundsException aiobe) {
} catch (ArrayIndexOutOfBoundsException e) {
throw new ShortBufferException("Output buffer invalid");
}
if (len < tagLenBytes) {
throw new AEADBadTagException("Input too short - need tag");
}
if (len - tagLenBytes > out.length - outOfs) {
save = ghashAllToS.clone();
throw new ShortBufferException("Output buffer too small, must" +
"be at least " + (len - tagLenBytes) + " bytes long");
}
checkDataLength(len - tagLenBytes);
processAAD();
findTag(in, inOfs, inLen);
byte[] block = getLengthBlock(sizeOfAAD,
decryptBlocks(ghashAllToS, in, inOfs, inLen, null, 0));
ghashAllToS.update(block);
block = ghashAllToS.digest();
out = overlapDetection(in, inOfs, out, outOfs);
len = decryptBlocks(new DecryptOp(gctr, ghash), in, inOfs, inLen,
out, outOfs);
byte[] block = getLengthBlock(sizeOfAAD, len);
ghash.update(block);
block = ghash.digest();
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
tagLenBytes, block, 0);
@ -1392,30 +1472,24 @@ abstract class GaloisCounterMode extends CipherSpi {
}
if (mismatch != 0) {
throw new AEADBadTagException("Tag mismatch!");
// Clear output data
Arrays.fill(out, outOfs, outOfs + len, (byte) 0);
throw new AEADBadTagException("Tag mismatch");
}
if (save != null) {
ghashAllToS = save;
throw new ShortBufferException("Output buffer too small, must" +
"be at least " + (len - tagLenBytes) + " bytes long");
}
out = overlapDetection(in, inOfs, out, outOfs);
len = decryptBlocks(gctrPAndC, in, inOfs, inLen, out, outOfs);
restoreOut(out, len);
return len;
}
/**
* Use any data from ibuffer and 'src' to first verify the auth tag. If
* the tag is valid, decrypt the data.
* Use available data from ibuffer and 'src' to verify and decrypt the
* data. If the verification fails, the 'dst' left to it's original
* values if crypto was in-place; otherwise 'dst' is zeroed
*/
@Override
public int doFinal(ByteBuffer src, ByteBuffer dst)
throws IllegalBlockSizeException, AEADBadTagException,
ShortBufferException {
GHASH save = null;
ByteBuffer tag;
ByteBuffer ct = src.duplicate();
@ -1432,11 +1506,10 @@ abstract class GaloisCounterMode extends CipherSpi {
checkDataLength(len);
// Save GHASH context to allow the tag to be checked even though
// the dst buffer is too short. Context will be restored so the
// method can be called again with the proper sized dst buffer.
// Verify dst is large enough
if (len > dst.remaining()) {
save = ghashAllToS.clone();
throw new ShortBufferException("Output buffer too small, " +
"must be at least " + len + " bytes long");
}
// Create buffer 'tag' that contains only the auth tag
@ -1459,20 +1532,19 @@ abstract class GaloisCounterMode extends CipherSpi {
tag.put(ct);
tag.flip();
} else {
throw new AEADBadTagException("Input too short - need tag");
throw new AEADBadTagException("Input data too short to " +
"contain an expected tag length of " + tagLenBytes +
"bytes");
}
// Set the mark for a later reset. Either it will be zero, or the
// tag buffer creation above will have consume some or all of it.
ct.mark();
dst = overlapDetection(src, dst);
dst.mark();
processAAD();
// Perform GHASH check on data
doLastBlock(ghashAllToS, buffer, ct, null);
len = doLastBlock(new DecryptOp(gctr, ghash), buffer, ct, dst);
byte[] block = getLengthBlock(sizeOfAAD, len);
ghashAllToS.update(block);
block = ghashAllToS.digest();
ghash.update(block);
block = ghash.digest();
new GCTR(blockCipher, preCounterBlock).doFinal(block, 0,
tagLenBytes, block, 0);
@ -1483,32 +1555,22 @@ abstract class GaloisCounterMode extends CipherSpi {
}
if (mismatch != 0) {
throw new AEADBadTagException("Tag mismatch!");
// Clear output data
dst.reset();
if (dst.hasArray()) {
int ofs = dst.arrayOffset() + dst.position();
Arrays.fill(dst.array(), ofs , ofs + len, (byte)0);
} else {
Unsafe.getUnsafe().setMemory(((DirectBuffer)dst).address(),
len + dst.position(), (byte)0);
}
throw new AEADBadTagException("Tag mismatch");
}
if (save != null) {
ghashAllToS = save;
throw new ShortBufferException("Output buffer too small, must" +
" be at least " + len + " bytes long");
}
// Prepare for decryption
if (buffer != null) {
buffer.flip();
}
ct.reset();
processed = 0;
// Check for overlap in the bytebuffers
dst = overlapDetection(src, dst);
// Decrypt the all the input data and put it into dst
doLastBlock(gctrPAndC, buffer, ct, dst);
restoreDst(dst);
src.position(src.limit());
if (ibuffer != null) {
ibuffer.reset();
}
return processed;
engine = null;
restoreDst(dst);
return len;
}
/**
@ -1517,11 +1579,12 @@ abstract class GaloisCounterMode extends CipherSpi {
* When this method is used, all the data is either in the ibuffer
* or in 'in'.
*/
int decryptBlocks(GCM op, byte[] in, int inOfs, int inLen,
int decryptBlocks(GCMOperation op, byte[] in, int inOfs, int inLen,
byte[] out, int outOfs) {
byte[] buffer;
byte[] block;
int len = 0;
int resultLen;
// Calculate the encrypted data length inside the ibuffer
// considering the tag location
@ -1538,15 +1601,24 @@ abstract class GaloisCounterMode extends CipherSpi {
if (bLen > 0) {
buffer = ibuffer.toByteArray();
if (bLen >= blockSize) {
len += op.update(buffer, 0, bLen, out, outOfs);
outOfs += len; // noop for ghash
if (bLen >= PARALLEL_LEN) {
len = GaloisCounterMode.implGCMCrypt(buffer, 0, bLen,
buffer, 0, out, outOfs, gctr, ghash);
outOfs += len;
// Use len as it becomes the ibuffer offset, if
// needed, in the next op
}
// merge the remaining ibuffer with the 'in'
int bufRemainder = bLen - len;
if (bufRemainder >= blockSize) {
resultLen = op.update(buffer, len, bufRemainder, out,
outOfs);
len += resultLen;
outOfs += resultLen;
bufRemainder -= resultLen;
}
// merge the remaining ibuffer with the 'in'
if (bufRemainder > 0) {
block = new byte[blockSize];
int inUsed = mergeBlock(buffer, len, bufRemainder, in,
@ -1557,9 +1629,9 @@ abstract class GaloisCounterMode extends CipherSpi {
// If is more than block between the merged data and 'in',
// update(), otherwise setup for final
if (inLen > 0) {
int resultLen = op.update(block, 0, blockSize,
resultLen = op.update(block, 0, blockSize,
out, outOfs);
outOfs += resultLen; // noop for ghash
outOfs += resultLen;
len += resultLen;
} else {
in = block;
@ -1569,14 +1641,6 @@ abstract class GaloisCounterMode extends CipherSpi {
}
}
// Finish off the operation
if (inLen > TRIGGERLEN) {
int l = throttleData(op, in, inOfs, inLen, out, outOfs);
inOfs += l;
inLen -= l;
outOfs += l; // noop for ghash
len += l;
}
return len + op.doFinal(in, inOfs, inLen, out, outOfs);
}
}
@ -1609,11 +1673,11 @@ abstract class GaloisCounterMode extends CipherSpi {
* This class is for encryption when both GCTR and GHASH
* can operation in parallel.
*/
static final class GCTRGHASH implements GCM {
static final class EncryptOp implements GCMOperation {
GCTR gctr;
GHASH ghash;
GCTRGHASH(GCTR c, GHASH g) {
EncryptOp(GCTR c, GHASH g) {
gctr = c;
ghash = g;
}
@ -1645,19 +1709,96 @@ abstract class GaloisCounterMode extends CipherSpi {
}
@Override
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out, int outOfs) {
int len = gctr.doFinal(in, inOfs, inLen, out, outOfs);
ghash.doFinal(out, outOfs, len);
return len;
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) {
int len = 0;
if (inLen >= PARALLEL_LEN) {
len = implGCMCrypt(in, inOfs, inLen, out, outOfs, out, outOfs,
gctr, ghash);
inLen -= len;
outOfs += len;
}
gctr.doFinal(in, inOfs + len, inLen, out, outOfs);
return len + ghash.doFinal(out, outOfs, inLen);
}
@Override
public int doFinal(ByteBuffer src, ByteBuffer dst) {
dst.mark();
int l = gctr.doFinal(src, dst);
int len = gctr.doFinal(src, dst);
dst.reset();
ghash.doFinal(dst, l);
return l;
ghash.doFinal(dst, len);
return len;
}
}
/**
* This class is for decryption when both GCTR and GHASH
* can operation in parallel.
*/
static final class DecryptOp implements GCMOperation {
GCTR gctr;
GHASH ghash;
DecryptOp(GCTR c, GHASH g) {
gctr = c;
ghash = g;
}
@Override
public int update(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) {
ghash.update(in, inOfs, inLen);
return gctr.update(in, inOfs, inLen, out, outOfs);
}
@Override
public int update(byte[] in, int inOfs, int inLen, ByteBuffer dst) {
ghash.update(in, inOfs, inLen);
return gctr.update(in, inOfs, inLen, dst);
}
@Override
public int update(ByteBuffer src, ByteBuffer dst) {
src.mark();
ghash.update(src, src.remaining());
src.reset();
return gctr.update(src, dst);
}
@Override
public int doFinal(byte[] in, int inOfs, int inLen, byte[] out,
int outOfs) {
int len = 0;
if (inLen >= PARALLEL_LEN) {
len += implGCMCrypt(in, inOfs, inLen, in, inOfs, out, outOfs,
gctr, ghash);
}
ghash.doFinal(in, inOfs + len, inLen - len);
return len + gctr.doFinal(in, inOfs + len, inLen - len, out,
outOfs + len);
}
@Override
public int doFinal(ByteBuffer src, ByteBuffer dst) {
src.mark();
ghash.doFinal(src, src.remaining());
src.reset();
return gctr.doFinal(src, dst);
}
}
/**
* Interface to organize encryption and decryption operations in the
* proper order for GHASH and GCTR.
*/
public interface GCMOperation {
int update(byte[] in, int inOfs, int inLen, byte[] out, int outOfs);
int update(byte[] in, int inOfs, int inLen, ByteBuffer dst);
int update(ByteBuffer src, ByteBuffer dst);
int doFinal(byte[] in, int inOfs, int inLen, byte[] out, int outOfs);
int doFinal(ByteBuffer src, ByteBuffer dst);
}
}

@ -98,6 +98,27 @@
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DmsgSize=2054
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=2048
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
* compiler.codegen.aes.TestAESMain
*
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=CTR
* -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.