From e5bbad059d5dc293288ac415c9f7d0fa89c64b74 Mon Sep 17 00:00:00 2001 From: Arseny Bochkarev Date: Thu, 31 Oct 2024 12:19:48 +0000 Subject: [PATCH] 8334999: RISC-V: implement AES single block encryption/decryption intrinsics Reviewed-by: fyang, rehn, yzhu --- src/hotspot/cpu/riscv/assembler_riscv.hpp | 7 + src/hotspot/cpu/riscv/stubGenerator_riscv.cpp | 173 ++++++++++++++++++ src/hotspot/cpu/riscv/vm_version_riscv.cpp | 28 +-- src/hotspot/share/opto/library_call.cpp | 4 +- 4 files changed, 199 insertions(+), 13 deletions(-) diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp index 23046419460..7334ec675e3 100644 --- a/src/hotspot/cpu/riscv/assembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp @@ -1962,6 +1962,13 @@ enum Nf { INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt + // Vector AES instructions (Zvkned extension) + INSN(vaesem_vv, 0b1110111, 0b010, 0b00010, 0b101000); + INSN(vaesef_vv, 0b1110111, 0b010, 0b00011, 0b101000); + + INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000); + INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000); + INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp index 77a2b794a7e..bce0c8f1f3d 100644 --- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp @@ -2276,6 +2276,174 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); } + void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) { + const int step = 16; + for (int i = 0; i < rounds; i++) { + __ vle32_v(working_vregs[i], key); + // The keys are stored in little-endian array, while we need + // to operate in big-endian. + // So performing an endian-swap here with vrev8.v instruction + __ vrev8_v(working_vregs[i], working_vregs[i]); + __ addi(key, key, step); + } + } + + void generate_aes_encrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) { + assert(rounds <= 15, "rounds should be less than or equal to working_vregs size"); + + __ vxor_vv(res, res, working_vregs[0]); + for (int i = 1; i < rounds - 1; i++) { + __ vaesem_vv(res, working_vregs[i]); + } + __ vaesef_vv(res, working_vregs[rounds - 1]); + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_encryptBlock() { + assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + Label L_aes128, L_aes192; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = c_rarg3; + + VectorRegister working_vregs[] = { + v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18 + }; + const VectorRegister res = v19; + + address start = __ pc(); + __ enter(); + + __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ vsetivli(x0, 4, Assembler::e32, Assembler::m1); + __ vle32_v(res, from); + + __ mv(t2, 52); + __ blt(keylen, t2, L_aes128); + __ beq(keylen, t2, L_aes192); + // Else we fallthrough to the biggest case (256-bit key size) + + // Note: the following function performs key += 15*16 + generate_aes_loadkeys(key, working_vregs, 15); + generate_aes_encrypt(res, working_vregs, 15); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + __ bind(L_aes192); + // Note: the following function performs key += 13*16 + generate_aes_loadkeys(key, working_vregs, 13); + generate_aes_encrypt(res, working_vregs, 13); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + __ bind(L_aes128); + // Note: the following function performs key += 11*16 + generate_aes_loadkeys(key, working_vregs, 11); + generate_aes_encrypt(res, working_vregs, 11); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + return start; + } + + void generate_aes_decrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) { + assert(rounds <= 15, "rounds should be less than or equal to working_vregs size"); + + __ vxor_vv(res, res, working_vregs[rounds - 1]); + for (int i = rounds - 2; i > 0; i--) { + __ vaesdm_vv(res, working_vregs[i]); + } + __ vaesdf_vv(res, working_vregs[0]); + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + address generate_aescrypt_decryptBlock() { + assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + + Label L_aes128, L_aes192; + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = c_rarg3; + + VectorRegister working_vregs[] = { + v4, v5, v6, v7, v8, v9, v10, v11, + v12, v13, v14, v15, v16, v17, v18 + }; + const VectorRegister res = v19; + + address start = __ pc(); + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + __ vsetivli(x0, 4, Assembler::e32, Assembler::m1); + __ vle32_v(res, from); + + __ mv(t2, 52); + __ blt(keylen, t2, L_aes128); + __ beq(keylen, t2, L_aes192); + // Else we fallthrough to the biggest case (256-bit key size) + + // Note: the following function performs key += 15*16 + generate_aes_loadkeys(key, working_vregs, 15); + generate_aes_decrypt(res, working_vregs, 15); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + __ bind(L_aes192); + // Note: the following function performs key += 13*16 + generate_aes_loadkeys(key, working_vregs, 13); + generate_aes_decrypt(res, working_vregs, 13); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + __ bind(L_aes128); + // Note: the following function performs key += 11*16 + generate_aes_loadkeys(key, working_vregs, 11); + generate_aes_decrypt(res, working_vregs, 11); + __ vse32_v(res, to); + __ mv(c_rarg0, 0); + __ leave(); + __ ret(); + + return start; + } + // code for comparing 16 bytes of strings with same encoding void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; @@ -6294,6 +6462,11 @@ static const int64_t right_3_bits = right_n_bits(3); StubRoutines::_montgomerySquare = g.generate_square(); } + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + } + if (UsePoly1305Intrinsics) { StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks(); } diff --git a/src/hotspot/cpu/riscv/vm_version_riscv.cpp b/src/hotspot/cpu/riscv/vm_version_riscv.cpp index e9c6226f446..c32d2af9939 100644 --- a/src/hotspot/cpu/riscv/vm_version_riscv.cpp +++ b/src/hotspot/cpu/riscv/vm_version_riscv.cpp @@ -122,17 +122,6 @@ void VM_Version::common_initialize() { FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); } - if (UseAES || UseAESIntrinsics) { - if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { - warning("AES instructions are not available on this CPU"); - FLAG_SET_DEFAULT(UseAES, false); - } - if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { - warning("AES intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseAESIntrinsics, false); - } - } - if (UseAESCTRIntrinsics) { warning("AES/CTR intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); @@ -429,6 +418,23 @@ void VM_Version::c2_initialize() { if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA, false); } + + // AES + if (UseZvkn) { + UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); + UseAESIntrinsics = + UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics)); + if (UseAESIntrinsics && !UseAES) { + warning("UseAESIntrinsics enabled, but UseAES not, enabling"); + UseAES = true; + } + } else if (UseAESIntrinsics || UseAES) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics) || !FLAG_IS_DEFAULT(UseAES)) { + warning("AES intrinsics require Zvkn extension (not available on this CPU)."); + } + FLAG_SET_DEFAULT(UseAES, false); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } } #endif // COMPILER2 diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 6b4a6524fd5..523b1efd0c2 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -7374,11 +7374,11 @@ bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) { //------------------------------get_key_start_from_aescrypt_object----------------------- Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) { -#if defined(PPC64) || defined(S390) +#if defined(PPC64) || defined(S390) || defined(RISCV64) // MixColumns for decryption can be reduced by preprocessing MixColumns with round keys. // Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns. // However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption. - // The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]). + // The ppc64 and riscv64 stubs of encryption and decryption use the same round keys (sessionK[0]). Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I"); assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt"); if (objSessionK == nullptr) {