8313779: RISC-V: use andn / orn in the MD5 instrinsic

Reviewed-by: luhenry, fyang
This commit is contained in:
Antonios Printezis 2023-08-07 14:17:44 +00:00
parent bbbfa217a0
commit 4726960fcd
3 changed files with 32 additions and 9 deletions

View File

@ -1654,6 +1654,28 @@ void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
sign_extend(Rd, Rd, 32);
}
// Rd = Rs1 & (~Rd2)
void MacroAssembler::andn(Register Rd, Register Rs1, Register Rs2) {
if (UseZbb) {
Assembler::andn(Rd, Rs1, Rs2);
return;
}
notr(Rd, Rs2);
andr(Rd, Rs1, Rd);
}
// Rd = Rs1 | (~Rd2)
void MacroAssembler::orn(Register Rd, Register Rs1, Register Rs2) {
if (UseZbb) {
Assembler::orn(Rd, Rs1, Rs2);
return;
}
notr(Rd, Rs2);
orr(Rd, Rs1, Rd);
}
// Note: load_unsigned_short used to be called load_unsigned_word.
int MacroAssembler::load_unsigned_short(Register dst, Address src) {
int off = offset();

View File

@ -763,6 +763,10 @@ public:
void orrw(Register Rd, Register Rs1, Register Rs2);
void xorrw(Register Rd, Register Rs1, Register Rs2);
// logic with negate
void andn(Register Rd, Register Rs1, Register Rs2);
void orn(Register Rd, Register Rs1, Register Rs2);
// revb
void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend

View File

@ -3960,7 +3960,7 @@ class StubGenerator: public StubCodeGenerator {
// rtmp1 = rtmp1 + x + ac
reg_cache.get_u32(rtmp2, k, rmask32);
__ addw(rtmp1, rtmp1, rtmp2);
__ li(rtmp2, t);
__ mv(rtmp2, t);
__ addw(rtmp1, rtmp1, rtmp2);
// a += rtmp1 + x + ac
@ -3981,8 +3981,7 @@ class StubGenerator: public StubCodeGenerator {
__ andr(rtmp1, b, c);
// rtmp2 = (~b) & d
__ notr(rtmp2, b);
__ andr(rtmp2, rtmp2, d);
__ andn(rtmp2, d, b);
// rtmp1 = (b & c) | ((~b) & d)
__ orr(rtmp1, rtmp1, rtmp2);
@ -4000,9 +3999,8 @@ class StubGenerator: public StubCodeGenerator {
// rtmp1 = b & d
__ andr(rtmp1, b, d);
// rtmp2 = (c & (~d))
__ notr(rtmp2, d);
__ andr(rtmp2, rtmp2, c);
// rtmp2 = c & (~d)
__ andn(rtmp2, c, d);
// rtmp1 = (b & d) | (c & (~d))
__ orr(rtmp1, rtmp1, rtmp2);
@ -4032,8 +4030,7 @@ class StubGenerator: public StubCodeGenerator {
int k, int s, int t,
Register rtmp1, Register rtmp2, Register rmask32) {
// rtmp1 = c ^ (b | (~d))
__ notr(rtmp2, d);
__ orr(rtmp1, b, rtmp2);
__ orn(rtmp1, b, d);
__ xorr(rtmp1, c, rtmp1);
m5_FF_GG_HH_II_epilogue(reg_cache, a, b, c, d, k, s, t,
@ -4156,7 +4153,7 @@ class StubGenerator: public StubCodeGenerator {
__ mv(ofs, ofs_arg);
__ mv(limit, limit_arg);
}
__ li(rmask32, MASK_32);
__ mv(rmask32, MASK_32);
// to minimize the number of memory operations:
// read the 4 state 4-byte values in pairs, with a single ld,