diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index ed360289298..24930367319 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -3332,9 +3332,36 @@ class StubGenerator: public StubCodeGenerator {
      return start;
   }
 
+  class Cached64Bytes {
+  private:
+    MacroAssembler *_masm;
+    Register _regs[8];
+
+  public:
+    Cached64Bytes(MacroAssembler *masm, RegSet rs): _masm(masm) {
+      assert(rs.size() == 8, "%u registers are used to cache 16 4-byte data", rs.size());
+      auto it = rs.begin();
+      for (auto &r: _regs) {
+        r = *it;
+        ++it;
+      }
+    }
+
+    void gen_loads(Register base) {
+      for (int i = 0; i < 8; i += 2) {
+        __ ldp(_regs[i], _regs[i + 1], Address(base, 8 * i));
+      }
+    }
+
+    // Generate code extracting i-th unsigned word (4 bytes) from cached 64 bytes.
+    void extract_u32(Register dest, int i) {
+      __ ubfx(dest, _regs[i / 2], 32 * (i % 2), 32);
+    }
+  };
+
   // Utility routines for md5.
   // Clobbers r10 and r11.
-  void md5_FF(Register buf, Register r1, Register r2, Register r3, Register r4,
+  void md5_FF(Cached64Bytes& reg_cache, Register r1, Register r2, Register r3, Register r4,
               int k, int s, int t) {
     Register rscratch3 = r10;
     Register rscratch4 = r11;
@@ -3343,7 +3370,7 @@ class StubGenerator: public StubCodeGenerator {
     __ movw(rscratch2, t);
     __ andw(rscratch3, rscratch3, r2);
     __ addw(rscratch4, r1, rscratch2);
-    __ ldrw(rscratch1, Address(buf, k*4));
+    reg_cache.extract_u32(rscratch1, k);
     __ eorw(rscratch3, rscratch3, r4);
     __ addw(rscratch4, rscratch4, rscratch1);
     __ addw(rscratch3, rscratch3, rscratch4);
@@ -3351,14 +3378,14 @@ class StubGenerator: public StubCodeGenerator {
     __ addw(r1, rscratch2, r2);
   }
 
-  void md5_GG(Register buf, Register r1, Register r2, Register r3, Register r4,
+  void md5_GG(Cached64Bytes& reg_cache, Register r1, Register r2, Register r3, Register r4,
               int k, int s, int t) {
     Register rscratch3 = r10;
     Register rscratch4 = r11;
 
     __ andw(rscratch3, r2, r4);
     __ bicw(rscratch4, r3, r4);
-    __ ldrw(rscratch1, Address(buf, k*4));
+    reg_cache.extract_u32(rscratch1, k);
     __ movw(rscratch2, t);
     __ orrw(rscratch3, rscratch3, rscratch4);
     __ addw(rscratch4, r1, rscratch2);
@@ -3368,7 +3395,7 @@ class StubGenerator: public StubCodeGenerator {
     __ addw(r1, rscratch2, r2);
   }
 
-  void md5_HH(Register buf, Register r1, Register r2, Register r3, Register r4,
+  void md5_HH(Cached64Bytes& reg_cache, Register r1, Register r2, Register r3, Register r4,
               int k, int s, int t) {
     Register rscratch3 = r10;
     Register rscratch4 = r11;
@@ -3376,7 +3403,7 @@ class StubGenerator: public StubCodeGenerator {
     __ eorw(rscratch3, r3, r4);
     __ movw(rscratch2, t);
     __ addw(rscratch4, r1, rscratch2);
-    __ ldrw(rscratch1, Address(buf, k*4));
+    reg_cache.extract_u32(rscratch1, k);
     __ eorw(rscratch3, rscratch3, r2);
     __ addw(rscratch4, rscratch4, rscratch1);
     __ addw(rscratch3, rscratch3, rscratch4);
@@ -3384,7 +3411,7 @@ class StubGenerator: public StubCodeGenerator {
     __ addw(r1, rscratch2, r2);
   }
 
-  void md5_II(Register buf, Register r1, Register r2, Register r3, Register r4,
+  void md5_II(Cached64Bytes& reg_cache, Register r1, Register r2, Register r3, Register r4,
               int k, int s, int t) {
     Register rscratch3 = r10;
     Register rscratch4 = r11;
@@ -3392,7 +3419,7 @@ class StubGenerator: public StubCodeGenerator {
     __ movw(rscratch3, t);
     __ ornw(rscratch2, r2, r4);
     __ addw(rscratch4, r1, rscratch3);
-    __ ldrw(rscratch1, Address(buf, k*4));
+    reg_cache.extract_u32(rscratch1, k);
     __ eorw(rscratch3, rscratch2, r3);
     __ addw(rscratch4, rscratch4, rscratch1);
     __ addw(rscratch3, rscratch3, rscratch4);
@@ -3424,103 +3451,104 @@ class StubGenerator: public StubCodeGenerator {
     Register rscratch3 = r10;
     Register rscratch4 = r11;
 
+    Register state_regs[2] = { r12, r13 };
+    RegSet saved_regs = RegSet::range(r16, r22) - r18_tls;
+    Cached64Bytes reg_cache(_masm, RegSet::of(r14, r15) + saved_regs);  // using 8 registers
+
+    __ push(saved_regs, sp);
+
+    __ ldp(state_regs[0], state_regs[1], Address(state));
+    __ ubfx(a, state_regs[0],  0, 32);
+    __ ubfx(b, state_regs[0], 32, 32);
+    __ ubfx(c, state_regs[1],  0, 32);
+    __ ubfx(d, state_regs[1], 32, 32);
+
     Label md5_loop;
     __ BIND(md5_loop);
 
-    // Save hash values for addition after rounds
-    __ ldrw(a, Address(state,  0));
-    __ ldrw(b, Address(state,  4));
-    __ ldrw(c, Address(state,  8));
-    __ ldrw(d, Address(state, 12));
+    reg_cache.gen_loads(buf);
 
     // Round 1
-    md5_FF(buf, a, b, c, d,  0,  7, 0xd76aa478);
-    md5_FF(buf, d, a, b, c,  1, 12, 0xe8c7b756);
-    md5_FF(buf, c, d, a, b,  2, 17, 0x242070db);
-    md5_FF(buf, b, c, d, a,  3, 22, 0xc1bdceee);
-    md5_FF(buf, a, b, c, d,  4,  7, 0xf57c0faf);
-    md5_FF(buf, d, a, b, c,  5, 12, 0x4787c62a);
-    md5_FF(buf, c, d, a, b,  6, 17, 0xa8304613);
-    md5_FF(buf, b, c, d, a,  7, 22, 0xfd469501);
-    md5_FF(buf, a, b, c, d,  8,  7, 0x698098d8);
-    md5_FF(buf, d, a, b, c,  9, 12, 0x8b44f7af);
-    md5_FF(buf, c, d, a, b, 10, 17, 0xffff5bb1);
-    md5_FF(buf, b, c, d, a, 11, 22, 0x895cd7be);
-    md5_FF(buf, a, b, c, d, 12,  7, 0x6b901122);
-    md5_FF(buf, d, a, b, c, 13, 12, 0xfd987193);
-    md5_FF(buf, c, d, a, b, 14, 17, 0xa679438e);
-    md5_FF(buf, b, c, d, a, 15, 22, 0x49b40821);
+    md5_FF(reg_cache, a, b, c, d,  0,  7, 0xd76aa478);
+    md5_FF(reg_cache, d, a, b, c,  1, 12, 0xe8c7b756);
+    md5_FF(reg_cache, c, d, a, b,  2, 17, 0x242070db);
+    md5_FF(reg_cache, b, c, d, a,  3, 22, 0xc1bdceee);
+    md5_FF(reg_cache, a, b, c, d,  4,  7, 0xf57c0faf);
+    md5_FF(reg_cache, d, a, b, c,  5, 12, 0x4787c62a);
+    md5_FF(reg_cache, c, d, a, b,  6, 17, 0xa8304613);
+    md5_FF(reg_cache, b, c, d, a,  7, 22, 0xfd469501);
+    md5_FF(reg_cache, a, b, c, d,  8,  7, 0x698098d8);
+    md5_FF(reg_cache, d, a, b, c,  9, 12, 0x8b44f7af);
+    md5_FF(reg_cache, c, d, a, b, 10, 17, 0xffff5bb1);
+    md5_FF(reg_cache, b, c, d, a, 11, 22, 0x895cd7be);
+    md5_FF(reg_cache, a, b, c, d, 12,  7, 0x6b901122);
+    md5_FF(reg_cache, d, a, b, c, 13, 12, 0xfd987193);
+    md5_FF(reg_cache, c, d, a, b, 14, 17, 0xa679438e);
+    md5_FF(reg_cache, b, c, d, a, 15, 22, 0x49b40821);
 
     // Round 2
-    md5_GG(buf, a, b, c, d,  1,  5, 0xf61e2562);
-    md5_GG(buf, d, a, b, c,  6,  9, 0xc040b340);
-    md5_GG(buf, c, d, a, b, 11, 14, 0x265e5a51);
-    md5_GG(buf, b, c, d, a,  0, 20, 0xe9b6c7aa);
-    md5_GG(buf, a, b, c, d,  5,  5, 0xd62f105d);
-    md5_GG(buf, d, a, b, c, 10,  9, 0x02441453);
-    md5_GG(buf, c, d, a, b, 15, 14, 0xd8a1e681);
-    md5_GG(buf, b, c, d, a,  4, 20, 0xe7d3fbc8);
-    md5_GG(buf, a, b, c, d,  9,  5, 0x21e1cde6);
-    md5_GG(buf, d, a, b, c, 14,  9, 0xc33707d6);
-    md5_GG(buf, c, d, a, b,  3, 14, 0xf4d50d87);
-    md5_GG(buf, b, c, d, a,  8, 20, 0x455a14ed);
-    md5_GG(buf, a, b, c, d, 13,  5, 0xa9e3e905);
-    md5_GG(buf, d, a, b, c,  2,  9, 0xfcefa3f8);
-    md5_GG(buf, c, d, a, b,  7, 14, 0x676f02d9);
-    md5_GG(buf, b, c, d, a, 12, 20, 0x8d2a4c8a);
+    md5_GG(reg_cache, a, b, c, d,  1,  5, 0xf61e2562);
+    md5_GG(reg_cache, d, a, b, c,  6,  9, 0xc040b340);
+    md5_GG(reg_cache, c, d, a, b, 11, 14, 0x265e5a51);
+    md5_GG(reg_cache, b, c, d, a,  0, 20, 0xe9b6c7aa);
+    md5_GG(reg_cache, a, b, c, d,  5,  5, 0xd62f105d);
+    md5_GG(reg_cache, d, a, b, c, 10,  9, 0x02441453);
+    md5_GG(reg_cache, c, d, a, b, 15, 14, 0xd8a1e681);
+    md5_GG(reg_cache, b, c, d, a,  4, 20, 0xe7d3fbc8);
+    md5_GG(reg_cache, a, b, c, d,  9,  5, 0x21e1cde6);
+    md5_GG(reg_cache, d, a, b, c, 14,  9, 0xc33707d6);
+    md5_GG(reg_cache, c, d, a, b,  3, 14, 0xf4d50d87);
+    md5_GG(reg_cache, b, c, d, a,  8, 20, 0x455a14ed);
+    md5_GG(reg_cache, a, b, c, d, 13,  5, 0xa9e3e905);
+    md5_GG(reg_cache, d, a, b, c,  2,  9, 0xfcefa3f8);
+    md5_GG(reg_cache, c, d, a, b,  7, 14, 0x676f02d9);
+    md5_GG(reg_cache, b, c, d, a, 12, 20, 0x8d2a4c8a);
 
     // Round 3
-    md5_HH(buf, a, b, c, d,  5,  4, 0xfffa3942);
-    md5_HH(buf, d, a, b, c,  8, 11, 0x8771f681);
-    md5_HH(buf, c, d, a, b, 11, 16, 0x6d9d6122);
-    md5_HH(buf, b, c, d, a, 14, 23, 0xfde5380c);
-    md5_HH(buf, a, b, c, d,  1,  4, 0xa4beea44);
-    md5_HH(buf, d, a, b, c,  4, 11, 0x4bdecfa9);
-    md5_HH(buf, c, d, a, b,  7, 16, 0xf6bb4b60);
-    md5_HH(buf, b, c, d, a, 10, 23, 0xbebfbc70);
-    md5_HH(buf, a, b, c, d, 13,  4, 0x289b7ec6);
-    md5_HH(buf, d, a, b, c,  0, 11, 0xeaa127fa);
-    md5_HH(buf, c, d, a, b,  3, 16, 0xd4ef3085);
-    md5_HH(buf, b, c, d, a,  6, 23, 0x04881d05);
-    md5_HH(buf, a, b, c, d,  9,  4, 0xd9d4d039);
-    md5_HH(buf, d, a, b, c, 12, 11, 0xe6db99e5);
-    md5_HH(buf, c, d, a, b, 15, 16, 0x1fa27cf8);
-    md5_HH(buf, b, c, d, a,  2, 23, 0xc4ac5665);
+    md5_HH(reg_cache, a, b, c, d,  5,  4, 0xfffa3942);
+    md5_HH(reg_cache, d, a, b, c,  8, 11, 0x8771f681);
+    md5_HH(reg_cache, c, d, a, b, 11, 16, 0x6d9d6122);
+    md5_HH(reg_cache, b, c, d, a, 14, 23, 0xfde5380c);
+    md5_HH(reg_cache, a, b, c, d,  1,  4, 0xa4beea44);
+    md5_HH(reg_cache, d, a, b, c,  4, 11, 0x4bdecfa9);
+    md5_HH(reg_cache, c, d, a, b,  7, 16, 0xf6bb4b60);
+    md5_HH(reg_cache, b, c, d, a, 10, 23, 0xbebfbc70);
+    md5_HH(reg_cache, a, b, c, d, 13,  4, 0x289b7ec6);
+    md5_HH(reg_cache, d, a, b, c,  0, 11, 0xeaa127fa);
+    md5_HH(reg_cache, c, d, a, b,  3, 16, 0xd4ef3085);
+    md5_HH(reg_cache, b, c, d, a,  6, 23, 0x04881d05);
+    md5_HH(reg_cache, a, b, c, d,  9,  4, 0xd9d4d039);
+    md5_HH(reg_cache, d, a, b, c, 12, 11, 0xe6db99e5);
+    md5_HH(reg_cache, c, d, a, b, 15, 16, 0x1fa27cf8);
+    md5_HH(reg_cache, b, c, d, a,  2, 23, 0xc4ac5665);
 
     // Round 4
-    md5_II(buf, a, b, c, d,  0,  6, 0xf4292244);
-    md5_II(buf, d, a, b, c,  7, 10, 0x432aff97);
-    md5_II(buf, c, d, a, b, 14, 15, 0xab9423a7);
-    md5_II(buf, b, c, d, a,  5, 21, 0xfc93a039);
-    md5_II(buf, a, b, c, d, 12,  6, 0x655b59c3);
-    md5_II(buf, d, a, b, c,  3, 10, 0x8f0ccc92);
-    md5_II(buf, c, d, a, b, 10, 15, 0xffeff47d);
-    md5_II(buf, b, c, d, a,  1, 21, 0x85845dd1);
-    md5_II(buf, a, b, c, d,  8,  6, 0x6fa87e4f);
-    md5_II(buf, d, a, b, c, 15, 10, 0xfe2ce6e0);
-    md5_II(buf, c, d, a, b,  6, 15, 0xa3014314);
-    md5_II(buf, b, c, d, a, 13, 21, 0x4e0811a1);
-    md5_II(buf, a, b, c, d,  4,  6, 0xf7537e82);
-    md5_II(buf, d, a, b, c, 11, 10, 0xbd3af235);
-    md5_II(buf, c, d, a, b,  2, 15, 0x2ad7d2bb);
-    md5_II(buf, b, c, d, a,  9, 21, 0xeb86d391);
+    md5_II(reg_cache, a, b, c, d,  0,  6, 0xf4292244);
+    md5_II(reg_cache, d, a, b, c,  7, 10, 0x432aff97);
+    md5_II(reg_cache, c, d, a, b, 14, 15, 0xab9423a7);
+    md5_II(reg_cache, b, c, d, a,  5, 21, 0xfc93a039);
+    md5_II(reg_cache, a, b, c, d, 12,  6, 0x655b59c3);
+    md5_II(reg_cache, d, a, b, c,  3, 10, 0x8f0ccc92);
+    md5_II(reg_cache, c, d, a, b, 10, 15, 0xffeff47d);
+    md5_II(reg_cache, b, c, d, a,  1, 21, 0x85845dd1);
+    md5_II(reg_cache, a, b, c, d,  8,  6, 0x6fa87e4f);
+    md5_II(reg_cache, d, a, b, c, 15, 10, 0xfe2ce6e0);
+    md5_II(reg_cache, c, d, a, b,  6, 15, 0xa3014314);
+    md5_II(reg_cache, b, c, d, a, 13, 21, 0x4e0811a1);
+    md5_II(reg_cache, a, b, c, d,  4,  6, 0xf7537e82);
+    md5_II(reg_cache, d, a, b, c, 11, 10, 0xbd3af235);
+    md5_II(reg_cache, c, d, a, b,  2, 15, 0x2ad7d2bb);
+    md5_II(reg_cache, b, c, d, a,  9, 21, 0xeb86d391);
 
-    // write hash values back in the correct order
-    __ ldrw(rscratch1, Address(state,  0));
-    __ addw(rscratch1, rscratch1, a);
-    __ strw(rscratch1, Address(state,  0));
+    __ addw(a, state_regs[0], a);
+    __ ubfx(rscratch2, state_regs[0], 32, 32);
+    __ addw(b, rscratch2, b);
+    __ addw(c, state_regs[1], c);
+    __ ubfx(rscratch4, state_regs[1], 32, 32);
+    __ addw(d, rscratch4, d);
 
-    __ ldrw(rscratch2, Address(state,  4));
-    __ addw(rscratch2, rscratch2, b);
-    __ strw(rscratch2, Address(state,  4));
-
-    __ ldrw(rscratch3, Address(state,  8));
-    __ addw(rscratch3, rscratch3, c);
-    __ strw(rscratch3, Address(state,  8));
-
-    __ ldrw(rscratch4, Address(state, 12));
-    __ addw(rscratch4, rscratch4, d);
-    __ strw(rscratch4, Address(state, 12));
+    __ orr(state_regs[0], a, b, Assembler::LSL, 32);
+    __ orr(state_regs[1], c, d, Assembler::LSL, 32);
 
     if (multi_block) {
       __ add(buf, buf, 64);
@@ -3530,6 +3558,11 @@ class StubGenerator: public StubCodeGenerator {
       __ mov(c_rarg0, ofs); // return ofs
     }
 
+    // write hash values back in the correct order
+    __ stp(state_regs[0], state_regs[1], Address(state));
+
+    __ pop(saved_regs, sp);
+
     __ ret(lr);
 
     return start;