8334999: RISC-V: implement AES single block encryption/decryption intrinsics

Reviewed-by: fyang, rehn, yzhu
This commit is contained in:
Arseny Bochkarev 2024-10-31 12:19:48 +00:00
parent c40bb7621c
commit e5bbad059d
4 changed files with 199 additions and 13 deletions

View File

@ -1962,6 +1962,13 @@ enum Nf {
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
// Vector AES instructions (Zvkned extension)
INSN(vaesem_vv, 0b1110111, 0b010, 0b00010, 0b101000);
INSN(vaesef_vv, 0b1110111, 0b010, 0b00011, 0b101000);
INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000);
INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000);
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros

View File

@ -2276,6 +2276,174 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
} }
void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) {
const int step = 16;
for (int i = 0; i < rounds; i++) {
__ vle32_v(working_vregs[i], key);
// The keys are stored in little-endian array, while we need
// to operate in big-endian.
// So performing an endian-swap here with vrev8.v instruction
__ vrev8_v(working_vregs[i], working_vregs[i]);
__ addi(key, key, step);
}
}
void generate_aes_encrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
__ vxor_vv(res, res, working_vregs[0]);
for (int i = 1; i < rounds - 1; i++) {
__ vaesem_vv(res, working_vregs[i]);
}
__ vaesef_vv(res, working_vregs[rounds - 1]);
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_encryptBlock() {
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
Label L_aes128, L_aes192;
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
const Register key = c_rarg2; // key array address
const Register keylen = c_rarg3;
VectorRegister working_vregs[] = {
v4, v5, v6, v7, v8, v9, v10, v11,
v12, v13, v14, v15, v16, v17, v18
};
const VectorRegister res = v19;
address start = __ pc();
__ enter();
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ vle32_v(res, from);
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128);
__ beq(keylen, t2, L_aes192);
// Else we fallthrough to the biggest case (256-bit key size)
// Note: the following function performs key += 15*16
generate_aes_loadkeys(key, working_vregs, 15);
generate_aes_encrypt(res, working_vregs, 15);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
__ bind(L_aes192);
// Note: the following function performs key += 13*16
generate_aes_loadkeys(key, working_vregs, 13);
generate_aes_encrypt(res, working_vregs, 13);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
__ bind(L_aes128);
// Note: the following function performs key += 11*16
generate_aes_loadkeys(key, working_vregs, 11);
generate_aes_encrypt(res, working_vregs, 11);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
return start;
}
void generate_aes_decrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
__ vxor_vv(res, res, working_vregs[rounds - 1]);
for (int i = rounds - 2; i > 0; i--) {
__ vaesdm_vv(res, working_vregs[i]);
}
__ vaesdf_vv(res, working_vregs[0]);
}
// Arguments:
//
// Inputs:
// c_rarg0 - source byte array address
// c_rarg1 - destination byte array address
// c_rarg2 - K (key) in little endian int array
//
address generate_aescrypt_decryptBlock() {
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
Label L_aes128, L_aes192;
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
const Register key = c_rarg2; // key array address
const Register keylen = c_rarg3;
VectorRegister working_vregs[] = {
v4, v5, v6, v7, v8, v9, v10, v11,
v12, v13, v14, v15, v16, v17, v18
};
const VectorRegister res = v19;
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
__ vle32_v(res, from);
__ mv(t2, 52);
__ blt(keylen, t2, L_aes128);
__ beq(keylen, t2, L_aes192);
// Else we fallthrough to the biggest case (256-bit key size)
// Note: the following function performs key += 15*16
generate_aes_loadkeys(key, working_vregs, 15);
generate_aes_decrypt(res, working_vregs, 15);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
__ bind(L_aes192);
// Note: the following function performs key += 13*16
generate_aes_loadkeys(key, working_vregs, 13);
generate_aes_decrypt(res, working_vregs, 13);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
__ bind(L_aes128);
// Note: the following function performs key += 11*16
generate_aes_loadkeys(key, working_vregs, 11);
generate_aes_decrypt(res, working_vregs, 11);
__ vse32_v(res, to);
__ mv(c_rarg0, 0);
__ leave();
__ ret();
return start;
}
// code for comparing 16 bytes of strings with same encoding // code for comparing 16 bytes of strings with same encoding
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) { void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31; const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
@ -6294,6 +6462,11 @@ static const int64_t right_3_bits = right_n_bits(3);
StubRoutines::_montgomerySquare = g.generate_square(); StubRoutines::_montgomerySquare = g.generate_square();
} }
if (UseAESIntrinsics) {
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
if (UsePoly1305Intrinsics) { if (UsePoly1305Intrinsics) {
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks(); StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
} }

View File

@ -122,17 +122,6 @@ void VM_Version::common_initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0); FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
} }
if (UseAES || UseAESIntrinsics) {
if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
warning("AES instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
}
if (UseAESCTRIntrinsics) { if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU"); warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@ -429,6 +418,23 @@ void VM_Version::c2_initialize() {
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) { if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false); FLAG_SET_DEFAULT(UseSHA, false);
} }
// AES
if (UseZvkn) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
UseAESIntrinsics =
UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
if (UseAESIntrinsics && !UseAES) {
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
UseAES = true;
}
} else if (UseAESIntrinsics || UseAES) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics) || !FLAG_IS_DEFAULT(UseAES)) {
warning("AES intrinsics require Zvkn extension (not available on this CPU).");
}
FLAG_SET_DEFAULT(UseAES, false);
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
} }
#endif // COMPILER2 #endif // COMPILER2

View File

@ -7374,11 +7374,11 @@ bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
//------------------------------get_key_start_from_aescrypt_object----------------------- //------------------------------get_key_start_from_aescrypt_object-----------------------
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) { Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
#if defined(PPC64) || defined(S390) #if defined(PPC64) || defined(S390) || defined(RISCV64)
// MixColumns for decryption can be reduced by preprocessing MixColumns with round keys. // MixColumns for decryption can be reduced by preprocessing MixColumns with round keys.
// Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns. // Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns.
// However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption. // However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption.
// The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]). // The ppc64 and riscv64 stubs of encryption and decryption use the same round keys (sessionK[0]).
Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I"); Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I");
assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt"); assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt");
if (objSessionK == nullptr) { if (objSessionK == nullptr) {