8337632: AES-GCM Algorithm optimization for x86_64
Reviewed-by: jbhateja, sviswanathan
This commit is contained in:
parent
5586f83e34
commit
a6b318863f
@ -1919,6 +1919,11 @@ void Assembler::cmpb(Address dst, int imm8) {
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::cmpb(Register dst, int imm8) {
|
||||
prefix(dst);
|
||||
emit_arith_b(0x80, 0xF8, dst, imm8);
|
||||
}
|
||||
|
||||
void Assembler::cmpl(Address dst, int32_t imm32) {
|
||||
InstructionMark im(this);
|
||||
prefix(dst);
|
||||
@ -9667,6 +9672,15 @@ void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
emit_int24(0x3A, (0xC0 | encode), imm8 & 0x01);
|
||||
}
|
||||
|
||||
void Assembler::evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len) {
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int24(0x38, (0xC0 | encode), imm8 & 0x03);
|
||||
}
|
||||
|
||||
|
||||
// vinsertf forms
|
||||
|
||||
@ -11731,6 +11745,21 @@ void Assembler::vbroadcastf128(XMMRegister dst, Address src, int vector_len) {
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::evbroadcastf64x2(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512dq(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_T2, /* input_size_in_bits */ EVEX_64bit);
|
||||
attributes.set_is_evex_instruction();
|
||||
// swap src<->dst for encoding
|
||||
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x1A);
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
|
||||
// gpr source broadcast forms
|
||||
|
||||
// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
|
||||
|
@ -1239,6 +1239,7 @@ private:
|
||||
void cmpb(Address dst, int imm8);
|
||||
void cmpb(Address dst, Register reg);
|
||||
void cmpb(Register reg, Address dst);
|
||||
void cmpb(Register reg, int imm8);
|
||||
|
||||
void cmpl(Address dst, int32_t imm32);
|
||||
void cmpl(Register dst, int32_t imm32);
|
||||
@ -2986,6 +2987,7 @@ private:
|
||||
void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
|
||||
void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
void evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len);
|
||||
|
||||
// vinsertf forms
|
||||
void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
|
||||
@ -3035,6 +3037,7 @@ private:
|
||||
void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
|
||||
void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
|
||||
void evbroadcastf64x2(XMMRegister dst, Address src, int vector_len);
|
||||
|
||||
// gpr sourced byte/word/dword/qword replicate
|
||||
void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
|
||||
|
@ -376,11 +376,22 @@ class StubGenerator: public StubCodeGenerator {
|
||||
void roundDec(XMMRegister key, int rnum);
|
||||
void lastroundDec(XMMRegister key, int rnum);
|
||||
void gfmul_avx512(XMMRegister ghash, XMMRegister hkey);
|
||||
void generateHtbl_48_block_zmm(Register htbl, Register avx512_subkeyHtbl, Register rscratch);
|
||||
void ghash16_encrypt16_parallel(Register key, Register subkeyHtbl, XMMRegister ctr_blockx,
|
||||
XMMRegister aad_hashx, Register in, Register out, Register data, Register pos, bool reduction,
|
||||
XMMRegister addmask, bool no_ghash_input, Register rounds, Register ghash_pos,
|
||||
bool final_reduction, int index, XMMRegister counter_inc_mask);
|
||||
void ghash16_encrypt_parallel16_avx512(Register in, Register out, Register ct, Register pos, Register avx512_subkeyHtbl,
|
||||
Register CTR_CHECK, Register NROUNDS, Register key, XMMRegister CTR, XMMRegister GHASH,
|
||||
XMMRegister ADDBE_4x4, XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK,
|
||||
bool hk_broadcast, bool is_hash_start, bool do_hash_reduction, bool do_hash_hxor,
|
||||
bool no_ghash_in, int ghashin_offset, int aesout_offset, int hashkey_offset);
|
||||
void generateHtbl_32_blocks_avx512(Register htbl, Register avx512_htbl);
|
||||
void initial_blocks_16_avx512(Register in, Register out, Register ct, Register pos, Register key, Register avx512_subkeyHtbl,
|
||||
Register CTR_CHECK, Register rounds, XMMRegister CTR, XMMRegister GHASH, XMMRegister ADDBE_4x4,
|
||||
XMMRegister ADDBE_1234, XMMRegister ADD_1234, XMMRegister SHUF_MASK, int stack_offset);
|
||||
void gcm_enc_dec_last_avx512(Register len, Register in, Register pos, XMMRegister HASH, XMMRegister SHUFM, Register subkeyHtbl,
|
||||
int ghashin_offset, int hashkey_offset, bool start_ghash, bool do_reduction);
|
||||
void ghash16_avx512(bool start_ghash, bool do_reduction, bool uload_shuffle, bool hk_broadcast, bool do_hxor,
|
||||
Register in, Register pos, Register subkeyHtbl, XMMRegister HASH, XMMRegister SHUFM, int in_offset,
|
||||
int in_disp, int displacement, int hashkey_offset);
|
||||
void aesgcm_avx512(Register in, Register len, Register ct, Register out, Register key,
|
||||
Register state, Register subkeyHtbl, Register avx512_subkeyHtbl, Register counter);
|
||||
// AVX2 AES-GCM related functions
|
||||
void initial_blocks_avx2(XMMRegister ctr, Register rounds, Register key, Register len,
|
||||
Register in, Register out, Register ct, XMMRegister aad_hashx, Register pos);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2021, Intel Corporation. All rights reserved.
|
||||
* Copyright (c) 2019, 2024, Intel Corporation. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -57,7 +57,10 @@ address StubGenerator::ghash_byte_swap_mask_addr() {
|
||||
|
||||
// Polynomial x^128+x^127+x^126+x^121+1
|
||||
ATTRIBUTE_ALIGNED(16) static const uint64_t GHASH_POLYNOMIAL[] = {
|
||||
0x0000000000000001UL, 0xC200000000000000UL,
|
||||
0x0000000000000001ULL, 0xC200000000000000ULL,
|
||||
0x0000000000000001ULL, 0xC200000000000000ULL,
|
||||
0x0000000000000001ULL, 0xC200000000000000ULL,
|
||||
0x0000000000000001ULL, 0xC200000000000000ULL
|
||||
};
|
||||
address StubGenerator::ghash_polynomial_addr() {
|
||||
return (address)GHASH_POLYNOMIAL;
|
||||
|
@ -72,7 +72,7 @@ abstract class GaloisCounterMode extends CipherSpi {
|
||||
// data size when buffer is divided up to aid in intrinsics
|
||||
private static final int TRIGGERLEN = 65536; // 64k
|
||||
// x86-64 parallel intrinsic data size
|
||||
private static final int PARALLEL_LEN = 7680;
|
||||
private static final int PARALLEL_LEN = 512;
|
||||
// max data size for x86-64 intrinsic
|
||||
private static final int SPLIT_LEN = 1048576; // 1MB
|
||||
|
||||
|
@ -35,7 +35,7 @@ import javax.crypto.spec.GCMParameterSpec;
|
||||
|
||||
public class AESGCMBench extends BenchBase {
|
||||
|
||||
@Param({"128"})
|
||||
@Param({"128", "192", "256"})
|
||||
int keyLength;
|
||||
|
||||
public static final int IV_MODULO = 16;
|
||||
|
@ -45,7 +45,7 @@ public abstract class BenchBase extends CryptoBase {
|
||||
int keyLength = 256;
|
||||
|
||||
// Default data sizes for full tests
|
||||
@Param({"1024", "1500", "4096", "16384"})
|
||||
@Param({"128", "256", "512", "1024", "1500", "4096", "16384"})
|
||||
int dataSize;
|
||||
|
||||
static final int IV_BUFFER_SIZE = 36;
|
||||
|
Loading…
Reference in New Issue
Block a user