8185979: PPC64: Implement SHA2 intrinsic
Co-authored-by: Bruno Rosa <bruno.rosa@eldorado.org.br> Co-authored-by: Gustavo Serra Scalet <gustavo.scalet@eldorado.org.br> Co-authored-by: Igor Nunes <igor.nunes@eldorado.org.br> Reviewed-by: mdoerr, goetz
This commit is contained in:
parent
6213838f11
commit
d0dc2dd231
src/hotspot/cpu/ppc
assembler_ppc.hppassembler_ppc.inline.hppmacroAssembler_ppc.hppmacroAssembler_ppc_sha.cppstubGenerator_ppc.cppstubRoutines_ppc.hppvm_version_ppc.cppvm_version_ppc.hpp
test/hotspot/jtreg/compiler
intrinsics/sha/cli/testcases
testlibrary/sha/predicate
@ -2175,7 +2175,8 @@ class Assembler : public AbstractAssembler {
|
||||
inline void vsbox( VectorRegister d, VectorRegister a);
|
||||
|
||||
// SHA (introduced with Power 8)
|
||||
// Not yet implemented.
|
||||
inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
|
||||
inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
|
||||
|
||||
// Vector Binary Polynomial Multiplication (introduced with Power 8)
|
||||
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
|
||||
@ -2286,6 +2287,10 @@ class Assembler : public AbstractAssembler {
|
||||
inline void lvsl( VectorRegister d, Register s2);
|
||||
inline void lvsr( VectorRegister d, Register s2);
|
||||
|
||||
// Endianess specific concatenation of 2 loaded vectors.
|
||||
inline void load_perm(VectorRegister perm, Register addr);
|
||||
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
|
||||
|
||||
// RegisterOrConstant versions.
|
||||
// These emitters choose between the versions using two registers and
|
||||
// those with register and immediate, depending on the content of roc.
|
||||
|
@ -926,7 +926,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
|
||||
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
|
||||
|
||||
// SHA (introduced with Power 8)
|
||||
// Not yet implemented.
|
||||
inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
|
||||
inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
|
||||
|
||||
// Vector Binary Polynomial Multiplication (introduced with Power 8)
|
||||
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
|
||||
@ -1035,6 +1036,22 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
|
||||
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
|
||||
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
|
||||
|
||||
inline void Assembler::load_perm(VectorRegister perm, Register addr) {
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
lvsr(perm, addr);
|
||||
#else
|
||||
lvsl(perm, addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
vperm(first_dest, second, first_dest, perm);
|
||||
#else
|
||||
vperm(first_dest, first_dest, second, perm);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::load_const(Register d, void* x, Register tmp) {
|
||||
load_const(d, (long)x, tmp);
|
||||
}
|
||||
|
@ -866,6 +866,40 @@ class MacroAssembler: public Assembler {
|
||||
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
|
||||
bool invertCRC);
|
||||
|
||||
// SHA-2 auxiliary functions and public interfaces
|
||||
private:
|
||||
void sha256_deque(const VectorRegister src,
|
||||
const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
|
||||
void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
|
||||
void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
|
||||
void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
|
||||
const int total_ws, const Register k, const VectorRegister* kpws,
|
||||
const int total_kpws);
|
||||
void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
|
||||
const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
|
||||
const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
|
||||
const Register j, const Register k);
|
||||
void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
|
||||
const VectorRegister c, const VectorRegister d, const VectorRegister e,
|
||||
const VectorRegister f, const VectorRegister g, const VectorRegister h,
|
||||
const Register hptr);
|
||||
|
||||
void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
|
||||
void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
|
||||
void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
|
||||
void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
|
||||
void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
|
||||
const VectorRegister w2, const VectorRegister w3,
|
||||
const VectorRegister w4, const VectorRegister w5,
|
||||
const VectorRegister w6, const VectorRegister w7,
|
||||
const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
|
||||
const VectorRegister vRb, const Register k);
|
||||
|
||||
public:
|
||||
void sha256(bool multi_block);
|
||||
void sha512(bool multi_block);
|
||||
|
||||
|
||||
//
|
||||
// Debugging
|
||||
//
|
||||
|
1136
src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
Normal file
1136
src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -3095,6 +3095,28 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||
assert(UseSHA, "need SHA instructions");
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry();
|
||||
|
||||
__ sha256 (multi_block);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_sha512_implCompress(bool multi_block, const char *name) {
|
||||
assert(UseSHA, "need SHA instructions");
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry();
|
||||
|
||||
__ sha512 (multi_block);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_arraycopy_stubs() {
|
||||
// Note: the disjoint stubs must be generated first, some of
|
||||
// the conjoint stubs use them.
|
||||
@ -3781,6 +3803,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
}
|
||||
|
||||
if (UseSHA256Intrinsics) {
|
||||
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||
}
|
||||
if (UseSHA512Intrinsics) {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
|
||||
|
||||
enum platform_dependent_constants {
|
||||
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
|
@ -113,7 +113,7 @@ void VM_Version::initialize() {
|
||||
// Create and print feature-string.
|
||||
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_fsqrt() ? " fsqrt" : ""),
|
||||
(has_isel() ? " isel" : ""),
|
||||
(has_lxarxeh() ? " lxarxeh" : ""),
|
||||
@ -130,7 +130,8 @@ void VM_Version::initialize() {
|
||||
(has_mfdscr() ? " mfdscr" : ""),
|
||||
(has_vsx() ? " vsx" : ""),
|
||||
(has_ldbrx() ? " ldbrx" : ""),
|
||||
(has_stdbrx() ? " stdbrx" : "")
|
||||
(has_stdbrx() ? " stdbrx" : ""),
|
||||
(has_vshasig() ? " sha" : "")
|
||||
// Make sure number of %s matches num_features!
|
||||
);
|
||||
_features_string = os::strdup(buf);
|
||||
@ -247,17 +248,43 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseFMA, true);
|
||||
}
|
||||
|
||||
if (UseSHA) {
|
||||
warning("SHA instructions are not available on this CPU");
|
||||
if (has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||
UseSHA = true;
|
||||
}
|
||||
} else if (UseSHA) {
|
||||
if (!FLAG_IS_DEFAULT(UseSHA))
|
||||
warning("SHA instructions are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
|
||||
warning("SHA intrinsics are not available on this CPU");
|
||||
|
||||
if (UseSHA1Intrinsics) {
|
||||
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA256Intrinsics) {
|
||||
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA512Intrinsics) {
|
||||
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||
UseSquareToLenIntrinsic = true;
|
||||
}
|
||||
@ -663,6 +690,7 @@ void VM_Version::determine_features() {
|
||||
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
|
||||
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
|
||||
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
|
||||
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
|
||||
a->blr();
|
||||
|
||||
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
|
||||
@ -714,6 +742,7 @@ void VM_Version::determine_features() {
|
||||
if (code[feature_cntr++]) features |= vsx_m;
|
||||
if (code[feature_cntr++]) features |= ldbrx_m;
|
||||
if (code[feature_cntr++]) features |= stdbrx_m;
|
||||
if (code[feature_cntr++]) features |= vshasig_m;
|
||||
|
||||
// Print the detection code.
|
||||
if (PrintAssembly) {
|
||||
|
@ -49,6 +49,7 @@ protected:
|
||||
vsx,
|
||||
ldbrx,
|
||||
stdbrx,
|
||||
vshasig,
|
||||
num_features // last entry to count features
|
||||
};
|
||||
enum Feature_Flag_Set {
|
||||
@ -64,6 +65,7 @@ protected:
|
||||
vand_m = (1 << vand ),
|
||||
lqarx_m = (1 << lqarx ),
|
||||
vcipher_m = (1 << vcipher),
|
||||
vshasig_m = (1 << vshasig),
|
||||
vpmsumb_m = (1 << vpmsumb),
|
||||
tcheck_m = (1 << tcheck ),
|
||||
mfdscr_m = (1 << mfdscr ),
|
||||
@ -106,6 +108,7 @@ public:
|
||||
static bool has_vsx() { return (_features & vsx_m) != 0; }
|
||||
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
|
||||
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
|
||||
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
|
||||
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
|
||||
|
||||
// Assembler testing
|
||||
|
@ -42,7 +42,8 @@ public class GenericTestCaseForOtherCPU extends
|
||||
new OrPredicate(Platform::isAArch64,
|
||||
new OrPredicate(Platform::isS390x,
|
||||
new OrPredicate(Platform::isSparc,
|
||||
new OrPredicate(Platform::isX64, Platform::isX86))))));
|
||||
new OrPredicate(Platform::isPPC,
|
||||
new OrPredicate(Platform::isX64, Platform::isX86)))))));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -71,23 +71,27 @@ public class IntrinsicPredicates {
|
||||
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha256" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null),
|
||||
// x86 variants
|
||||
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null),
|
||||
new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))));
|
||||
new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))));
|
||||
|
||||
public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
|
||||
= new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("s390.*", new String[] { "sha512" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha512" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, null),
|
||||
// x86 variants
|
||||
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" }, null),
|
||||
new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null),
|
||||
new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))));
|
||||
new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null))))))))));
|
||||
|
||||
public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE
|
||||
= new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE,
|
||||
|
Loading…
x
Reference in New Issue
Block a user