8334999: RISC-V: implement AES single block encryption/decryption intrinsics
Reviewed-by: fyang, rehn, yzhu
This commit is contained in:
parent
c40bb7621c
commit
e5bbad059d
@ -1962,6 +1962,13 @@ enum Nf {
|
|||||||
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
|
INSN(vbrev8_v, 0b1010111, 0b010, 0b01000, 0b010010); // reverse bits in every byte of element
|
||||||
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
|
INSN(vrev8_v, 0b1010111, 0b010, 0b01001, 0b010010); // reverse bytes in every elememt
|
||||||
|
|
||||||
|
// Vector AES instructions (Zvkned extension)
|
||||||
|
INSN(vaesem_vv, 0b1110111, 0b010, 0b00010, 0b101000);
|
||||||
|
INSN(vaesef_vv, 0b1110111, 0b010, 0b00011, 0b101000);
|
||||||
|
|
||||||
|
INSN(vaesdm_vv, 0b1110111, 0b010, 0b00000, 0b101000);
|
||||||
|
INSN(vaesdf_vv, 0b1110111, 0b010, 0b00001, 0b101000);
|
||||||
|
|
||||||
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
|
INSN(vclz_v, 0b1010111, 0b010, 0b01100, 0b010010); // count leading zeros
|
||||||
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros
|
INSN(vctz_v, 0b1010111, 0b010, 0b01101, 0b010010); // count trailing zeros
|
||||||
|
|
||||||
|
@ -2276,6 +2276,174 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void generate_aes_loadkeys(const Register &key, VectorRegister *working_vregs, int rounds) {
|
||||||
|
const int step = 16;
|
||||||
|
for (int i = 0; i < rounds; i++) {
|
||||||
|
__ vle32_v(working_vregs[i], key);
|
||||||
|
// The keys are stored in little-endian array, while we need
|
||||||
|
// to operate in big-endian.
|
||||||
|
// So performing an endian-swap here with vrev8.v instruction
|
||||||
|
__ vrev8_v(working_vregs[i], working_vregs[i]);
|
||||||
|
__ addi(key, key, step);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void generate_aes_encrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
|
||||||
|
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
|
||||||
|
|
||||||
|
__ vxor_vv(res, res, working_vregs[0]);
|
||||||
|
for (int i = 1; i < rounds - 1; i++) {
|
||||||
|
__ vaesem_vv(res, working_vregs[i]);
|
||||||
|
}
|
||||||
|
__ vaesef_vv(res, working_vregs[rounds - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arguments:
|
||||||
|
//
|
||||||
|
// Inputs:
|
||||||
|
// c_rarg0 - source byte array address
|
||||||
|
// c_rarg1 - destination byte array address
|
||||||
|
// c_rarg2 - K (key) in little endian int array
|
||||||
|
//
|
||||||
|
address generate_aescrypt_encryptBlock() {
|
||||||
|
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
|
||||||
|
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
|
||||||
|
|
||||||
|
Label L_aes128, L_aes192;
|
||||||
|
|
||||||
|
const Register from = c_rarg0; // source array address
|
||||||
|
const Register to = c_rarg1; // destination array address
|
||||||
|
const Register key = c_rarg2; // key array address
|
||||||
|
const Register keylen = c_rarg3;
|
||||||
|
|
||||||
|
VectorRegister working_vregs[] = {
|
||||||
|
v4, v5, v6, v7, v8, v9, v10, v11,
|
||||||
|
v12, v13, v14, v15, v16, v17, v18
|
||||||
|
};
|
||||||
|
const VectorRegister res = v19;
|
||||||
|
|
||||||
|
address start = __ pc();
|
||||||
|
__ enter();
|
||||||
|
|
||||||
|
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||||
|
|
||||||
|
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||||
|
__ vle32_v(res, from);
|
||||||
|
|
||||||
|
__ mv(t2, 52);
|
||||||
|
__ blt(keylen, t2, L_aes128);
|
||||||
|
__ beq(keylen, t2, L_aes192);
|
||||||
|
// Else we fallthrough to the biggest case (256-bit key size)
|
||||||
|
|
||||||
|
// Note: the following function performs key += 15*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 15);
|
||||||
|
generate_aes_encrypt(res, working_vregs, 15);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
__ bind(L_aes192);
|
||||||
|
// Note: the following function performs key += 13*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 13);
|
||||||
|
generate_aes_encrypt(res, working_vregs, 13);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
__ bind(L_aes128);
|
||||||
|
// Note: the following function performs key += 11*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 11);
|
||||||
|
generate_aes_encrypt(res, working_vregs, 11);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
void generate_aes_decrypt(const VectorRegister &res, VectorRegister *working_vregs, int rounds) {
|
||||||
|
assert(rounds <= 15, "rounds should be less than or equal to working_vregs size");
|
||||||
|
|
||||||
|
__ vxor_vv(res, res, working_vregs[rounds - 1]);
|
||||||
|
for (int i = rounds - 2; i > 0; i--) {
|
||||||
|
__ vaesdm_vv(res, working_vregs[i]);
|
||||||
|
}
|
||||||
|
__ vaesdf_vv(res, working_vregs[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Arguments:
|
||||||
|
//
|
||||||
|
// Inputs:
|
||||||
|
// c_rarg0 - source byte array address
|
||||||
|
// c_rarg1 - destination byte array address
|
||||||
|
// c_rarg2 - K (key) in little endian int array
|
||||||
|
//
|
||||||
|
address generate_aescrypt_decryptBlock() {
|
||||||
|
assert(UseAESIntrinsics, "need AES instructions (Zvkned extension) support");
|
||||||
|
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
|
||||||
|
|
||||||
|
Label L_aes128, L_aes192;
|
||||||
|
|
||||||
|
const Register from = c_rarg0; // source array address
|
||||||
|
const Register to = c_rarg1; // destination array address
|
||||||
|
const Register key = c_rarg2; // key array address
|
||||||
|
const Register keylen = c_rarg3;
|
||||||
|
|
||||||
|
VectorRegister working_vregs[] = {
|
||||||
|
v4, v5, v6, v7, v8, v9, v10, v11,
|
||||||
|
v12, v13, v14, v15, v16, v17, v18
|
||||||
|
};
|
||||||
|
const VectorRegister res = v19;
|
||||||
|
|
||||||
|
address start = __ pc();
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
|
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||||
|
|
||||||
|
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
|
||||||
|
__ vle32_v(res, from);
|
||||||
|
|
||||||
|
__ mv(t2, 52);
|
||||||
|
__ blt(keylen, t2, L_aes128);
|
||||||
|
__ beq(keylen, t2, L_aes192);
|
||||||
|
// Else we fallthrough to the biggest case (256-bit key size)
|
||||||
|
|
||||||
|
// Note: the following function performs key += 15*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 15);
|
||||||
|
generate_aes_decrypt(res, working_vregs, 15);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
__ bind(L_aes192);
|
||||||
|
// Note: the following function performs key += 13*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 13);
|
||||||
|
generate_aes_decrypt(res, working_vregs, 13);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
__ bind(L_aes128);
|
||||||
|
// Note: the following function performs key += 11*16
|
||||||
|
generate_aes_loadkeys(key, working_vregs, 11);
|
||||||
|
generate_aes_decrypt(res, working_vregs, 11);
|
||||||
|
__ vse32_v(res, to);
|
||||||
|
__ mv(c_rarg0, 0);
|
||||||
|
__ leave();
|
||||||
|
__ ret();
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
// code for comparing 16 bytes of strings with same encoding
|
// code for comparing 16 bytes of strings with same encoding
|
||||||
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
|
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
|
||||||
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
|
const Register result = x10, str1 = x11, cnt1 = x12, str2 = x13, tmp1 = x28, tmp2 = x29, tmp4 = x7, tmp5 = x31;
|
||||||
@ -6294,6 +6462,11 @@ static const int64_t right_3_bits = right_n_bits(3);
|
|||||||
StubRoutines::_montgomerySquare = g.generate_square();
|
StubRoutines::_montgomerySquare = g.generate_square();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseAESIntrinsics) {
|
||||||
|
StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
|
||||||
|
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||||
|
}
|
||||||
|
|
||||||
if (UsePoly1305Intrinsics) {
|
if (UsePoly1305Intrinsics) {
|
||||||
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
|
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
|
||||||
}
|
}
|
||||||
|
@ -122,17 +122,6 @@ void VM_Version::common_initialize() {
|
|||||||
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
|
FLAG_SET_DEFAULT(AllocatePrefetchDistance, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UseAES || UseAESIntrinsics) {
|
|
||||||
if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
|
|
||||||
warning("AES instructions are not available on this CPU");
|
|
||||||
FLAG_SET_DEFAULT(UseAES, false);
|
|
||||||
}
|
|
||||||
if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
|
|
||||||
warning("AES intrinsics are not available on this CPU");
|
|
||||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (UseAESCTRIntrinsics) {
|
if (UseAESCTRIntrinsics) {
|
||||||
warning("AES/CTR intrinsics are not available on this CPU");
|
warning("AES/CTR intrinsics are not available on this CPU");
|
||||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
||||||
@ -429,6 +418,23 @@ void VM_Version::c2_initialize() {
|
|||||||
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
|
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
|
||||||
FLAG_SET_DEFAULT(UseSHA, false);
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AES
|
||||||
|
if (UseZvkn) {
|
||||||
|
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
|
||||||
|
UseAESIntrinsics =
|
||||||
|
UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
|
||||||
|
if (UseAESIntrinsics && !UseAES) {
|
||||||
|
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
|
||||||
|
UseAES = true;
|
||||||
|
}
|
||||||
|
} else if (UseAESIntrinsics || UseAES) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseAESIntrinsics) || !FLAG_IS_DEFAULT(UseAES)) {
|
||||||
|
warning("AES intrinsics require Zvkn extension (not available on this CPU).");
|
||||||
|
}
|
||||||
|
FLAG_SET_DEFAULT(UseAES, false);
|
||||||
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
|
|
||||||
|
@ -7374,11 +7374,11 @@ bool LibraryCallKit::inline_counterMode_AESCrypt(vmIntrinsics::ID id) {
|
|||||||
|
|
||||||
//------------------------------get_key_start_from_aescrypt_object-----------------------
|
//------------------------------get_key_start_from_aescrypt_object-----------------------
|
||||||
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
|
Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
|
||||||
#if defined(PPC64) || defined(S390)
|
#if defined(PPC64) || defined(S390) || defined(RISCV64)
|
||||||
// MixColumns for decryption can be reduced by preprocessing MixColumns with round keys.
|
// MixColumns for decryption can be reduced by preprocessing MixColumns with round keys.
|
||||||
// Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns.
|
// Intel's extension is based on this optimization and AESCrypt generates round keys by preprocessing MixColumns.
|
||||||
// However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption.
|
// However, ppc64 vncipher processes MixColumns and requires the same round keys with encryption.
|
||||||
// The ppc64 stubs of encryption and decryption use the same round keys (sessionK[0]).
|
// The ppc64 and riscv64 stubs of encryption and decryption use the same round keys (sessionK[0]).
|
||||||
Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I");
|
Node* objSessionK = load_field_from_object(aescrypt_object, "sessionK", "[[I");
|
||||||
assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt");
|
assert (objSessionK != nullptr, "wrong version of com.sun.crypto.provider.AESCrypt");
|
||||||
if (objSessionK == nullptr) {
|
if (objSessionK == nullptr) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user