diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index b4c12ecd4a8..62831ee72ba 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -150,10 +150,12 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
   Register oop = objectReg;
   Register box = boxReg;
   Register disp_hdr = tmpReg;
+  Register owner_addr = tmpReg;
   Register tmp = tmp2Reg;
   Label cont;
   Label object_has_monitor;
   Label count, no_count;
+  Label unlocked;
 
   assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
   assert_different_registers(oop, box, tmp, disp_hdr);
@@ -204,14 +206,40 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Registe
   b(cont);
 
   bind(notRecursive);
+
+  // Compute owner address.
+  lea(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
+
+  // Set owner to null.
+  // Release to satisfy the JMM
+  stlr(zr, owner_addr);
+  // We need a full fence after clearing owner to avoid stranding.
+  // StoreLoad achieves this.
+  membar(StoreLoad);
+
+  // Check if the entry lists are empty.
   ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset()));
-  ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
-  orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
-  cmp(rscratch1, zr); // Sets flags for result
-  cbnz(rscratch1, cont);
-  // need a release store here
-  lea(tmp, Address(tmp, ObjectMonitor::owner_offset()));
-  stlr(zr, tmp); // set unowned
+  ldr(tmpReg, Address(tmp, ObjectMonitor::cxq_offset()));
+  orr(rscratch1, rscratch1, tmpReg);
+  cmp(rscratch1, zr);
+  br(Assembler::EQ, cont);     // If so we are done.
+
+  // Check if there is a successor.
+  ldr(rscratch1, Address(tmp, ObjectMonitor::succ_offset()));
+  cmp(rscratch1, zr);
+  br(Assembler::NE, unlocked); // If so we are done.
+
+  // Save the monitor pointer in the current thread, so we can try to
+  // reacquire the lock in SharedRuntime::monitor_exit_helper().
+  str(tmp, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+  cmp(zr, rthread); // Set Flag to NE => slow path
+  b(cont);
+
+  bind(unlocked);
+  cmp(zr, zr); // Set Flag to EQ => fast path
+
+  // Intentional fall-through
 
   bind(cont);
   // flag == EQ indicates success
@@ -498,33 +526,41 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, Regi
 
     bind(not_recursive);
 
-    Label release;
     const Register t2_owner_addr = t2;
 
     // Compute owner address.
     lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset()));
 
+    // Set owner to null.
+    // Release to satisfy the JMM
+    stlr(zr, t2_owner_addr);
+    // We need a full fence after clearing owner to avoid stranding.
+    // StoreLoad achieves this.
+    membar(StoreLoad);
+
     // Check if the entry lists are empty.
     ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset()));
     ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset()));
     orr(rscratch1, rscratch1, t3_t);
     cmp(rscratch1, zr);
-    br(Assembler::EQ, release);
+    br(Assembler::EQ, unlocked);  // If so we are done.
 
-    // The owner may be anonymous and we removed the last obj entry in
-    // the lock-stack. This loses the information about the owner.
-    // Write the thread to the owner field so the runtime knows the owner.
-    str(rthread, Address(t2_owner_addr));
+    // Check if there is a successor.
+    ldr(rscratch1, Address(t1_monitor, ObjectMonitor::succ_offset()));
+    cmp(rscratch1, zr);
+    br(Assembler::NE, unlocked);  // If so we are done.
+
+    // Save the monitor pointer in the current thread, so we can try to
+    // reacquire the lock in SharedRuntime::monitor_exit_helper().
+    str(t1_monitor, Address(rthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+    cmp(zr, rthread); // Set Flag to NE => slow path
     b(slow_path);
-
-    bind(release);
-    // Set owner to null.
-    // Release to satisfy the JMM
-    stlr(zr, t2_owner_addr);
   }
 
   bind(unlocked);
   decrement(Address(rthread, JavaThread::held_monitor_count_offset()));
+  cmp(zr, zr); // Set Flags to EQ => fast path
 
 #ifdef ASSERT
   // Check that unlocked label is reached with Flags == EQ.
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index a8635af9582..8d8e39b8bbc 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -2715,13 +2715,34 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
   b(success);
 
   bind(notRecursive);
+
+  // Set owner to null.
+  // Release to satisfy the JMM
+  release();
+  li(temp, 0);
+  std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+  // We need a full fence after clearing owner to avoid stranding.
+  // StoreLoad achieves this.
+  membar(StoreLoad);
+
+  // Check if the entry lists are empty.
   ld(temp,             in_bytes(ObjectMonitor::EntryList_offset()), current_header);
   ld(displaced_header, in_bytes(ObjectMonitor::cxq_offset()), current_header);
   orr(temp, temp, displaced_header); // Will be 0 if both are 0.
   cmpdi(flag, temp, 0);
-  bne(flag, failure);
-  release();
-  std(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
+  beq(flag, success);  // If so we are done.
+
+  // Check if there is a successor.
+  ld(temp, in_bytes(ObjectMonitor::succ_offset()), current_header);
+  cmpdi(flag, temp, 0);
+  bne(flag, success);  // If so we are done.
+
+  // Save the monitor pointer in the current thread, so we can try
+  // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+  std(current_header, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+  crxor(flag, Assembler::equal, flag, Assembler::equal); // Set flag = NE => slow path
+  b(failure);
 
   // flag == EQ indicates success, decrement held monitor count
   // flag == NE indicates failure
@@ -3028,27 +3049,39 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(ConditionRegister f
 
     bind(not_recursive);
 
-    Label release_;
+    Label set_eq_unlocked;
     const Register t2 = tmp2;
 
+    // Set owner to null.
+    // Release to satisfy the JMM
+    release();
+    li(t, 0);
+    std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
+    // We need a full fence after clearing owner to avoid stranding.
+    // StoreLoad achieves this.
+    membar(StoreLoad);
+
     // Check if the entry lists are empty.
     ld(t, in_bytes(ObjectMonitor::EntryList_offset()), monitor);
     ld(t2, in_bytes(ObjectMonitor::cxq_offset()), monitor);
     orr(t, t, t2);
     cmpdi(CCR0, t, 0);
-    beq(CCR0, release_);
+    beq(CCR0, unlocked); // If so we are done.
 
-    // The owner may be anonymous and we removed the last obj entry in
-    // the lock-stack. This loses the information about the owner.
-    // Write the thread to the owner field so the runtime knows the owner.
-    std(R16_thread, in_bytes(ObjectMonitor::owner_offset()), monitor);
+    // Check if there is a successor.
+    ld(t, in_bytes(ObjectMonitor::succ_offset()), monitor);
+    cmpdi(CCR0, t, 0);
+    bne(CCR0, set_eq_unlocked); // If so we are done.
+
+    // Save the monitor pointer in the current thread, so we can try
+    // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+    std(monitor, in_bytes(JavaThread::unlocked_inflated_monitor_offset()), R16_thread);
+
+    crxor(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = NE => slow path
     b(slow_path);
 
-    bind(release_);
-    // Set owner to null.
-    release();
-    // t contains 0
-    std(t, in_bytes(ObjectMonitor::owner_offset()), monitor);
+    bind(set_eq_unlocked);
+    crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set flag = EQ => fast path
   }
 
   bind(unlocked);
diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index e2c9b9dd609..75f87e35adf 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -165,6 +165,7 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
   Register oop = objectReg;
   Register box = boxReg;
   Register disp_hdr = tmp1Reg;
+  Register owner_addr = tmp1Reg;
   Register tmp = tmp2Reg;
   Label object_has_monitor;
   // Finish fast lock successfully. MUST branch to with flag == 0
@@ -222,15 +223,33 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg,
   j(unlocked);
 
   bind(notRecursive);
-  ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
-  ld(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset()));
-  orr(t0, t0, disp_hdr); // Will be 0 if both are 0.
-  bnez(t0, slow_path);
+  // Compute owner address.
+  la(owner_addr, Address(tmp, ObjectMonitor::owner_offset()));
 
-  // need a release store here
-  la(tmp, Address(tmp, ObjectMonitor::owner_offset()));
+  // Set owner to null.
+  // Release to satisfy the JMM
   membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-  sd(zr, Address(tmp)); // set unowned
+  sd(zr, Address(owner_addr));
+  // We need a full fence after clearing owner to avoid stranding.
+  // StoreLoad achieves this.
+  membar(StoreLoad);
+
+  // Check if the entry lists are empty.
+  ld(t0, Address(tmp, ObjectMonitor::EntryList_offset()));
+  ld(tmp1Reg, Address(tmp, ObjectMonitor::cxq_offset()));
+  orr(t0, t0, tmp1Reg);
+  beqz(t0, unlocked); // If so we are done.
+
+  // Check if there is a successor.
+  ld(t0, Address(tmp, ObjectMonitor::succ_offset()));
+  bnez(t0, unlocked); // If so we are done.
+
+  // Save the monitor pointer in the current thread, so we can try to
+  // reacquire the lock in SharedRuntime::monitor_exit_helper().
+  sd(tmp, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+  mv(flag, 1);
+  j(slow_path);
 
   bind(unlocked);
   mv(flag, zr);
@@ -534,28 +553,35 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box,
 
     bind(not_recursive);
 
-    Label release;
     const Register tmp2_owner_addr = tmp2;
 
     // Compute owner address.
     la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset()));
 
+    // Set owner to null.
+    // Release to satisfy the JMM
+    membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
+    sd(zr, Address(tmp2_owner_addr));
+    // We need a full fence after clearing owner to avoid stranding.
+    // StoreLoad achieves this.
+    membar(StoreLoad);
+
     // Check if the entry lists are empty.
     ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset()));
     ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset()));
     orr(t0, t0, tmp3_t);
-    beqz(t0, release);
+    beqz(t0, unlocked); // If so we are done.
 
-    // The owner may be anonymous and we removed the last obj entry in
-    // the lock-stack. This loses the information about the owner.
-    // Write the thread to the owner field so the runtime knows the owner.
-    sd(xthread, Address(tmp2_owner_addr));
+    // Check if there is a successor.
+    ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::succ_offset()));
+    bnez(tmp3_t, unlocked); // If so we are done.
+
+    // Save the monitor pointer in the current thread, so we can try
+    // to reacquire the lock in SharedRuntime::monitor_exit_helper().
+    sd(tmp1_monitor, Address(xthread, JavaThread::unlocked_inflated_monitor_offset()));
+
+    mv(flag, 1);
     j(slow_path);
-
-    bind(release);
-    // Set owner to null.
-    membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
-    sd(zr, Address(tmp2_owner_addr));
   }
 
   bind(unlocked);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index 6c26e17d5ce..af281345b14 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -3655,12 +3655,38 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg
 
   bind(not_recursive);
 
-  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-  z_brne(done);
-  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-  z_brne(done);
+  NearLabel check_succ, set_eq_unlocked;
+
+  // Set owner to null.
+  // Release to satisfy the JMM
   z_release();
-  z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+  z_lghi(temp, 0);
+  z_stg(temp, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader);
+  // We need a full fence after clearing owner to avoid stranding.
+  z_fence();
+
+  // Check if the entry lists are empty.
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
+  z_brne(check_succ);
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
+  z_bre(done); // If so we are done.
+
+  bind(check_succ);
+
+  // Check if there is a successor.
+  load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)));
+  z_brne(set_eq_unlocked); // If so we are done.
+
+  // Save the monitor pointer in the current thread, so we can try to
+  // reacquire the lock in SharedRuntime::monitor_exit_helper().
+  z_xilf(currentHeader, markWord::monitor_value);
+  z_stg(currentHeader, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
+
+  z_ltgr(oop, oop); // Set flag = NE
+  z_bru(done);
+
+  bind(set_eq_unlocked);
+  z_cr(temp, temp); // Set flag = EQ
 
   bind(done);
 
@@ -6454,6 +6480,7 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
     const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
     const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
     const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+    const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
     const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
     const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
 
@@ -6471,25 +6498,40 @@ void MacroAssembler::compiler_fast_unlock_lightweight_object(Register obj, Regis
 
     bind(not_recursive);
 
-    NearLabel not_ok;
+    NearLabel check_succ, set_eq_unlocked;
+
+    // Set owner to null.
+    // Release to satisfy the JMM
+    z_release();
+    z_lghi(tmp2, 0);
+    z_stg(tmp2 /*=0*/, owner_address);
+    // We need a full fence after clearing owner to avoid stranding.
+    z_fence();
+
     // Check if the entry lists are empty.
     load_and_test_long(tmp2, EntryList_address);
-    z_brne(not_ok);
+    z_brne(check_succ);
     load_and_test_long(tmp2, cxq_address);
-    z_brne(not_ok);
+    z_bre(unlocked); // If so we are done.
 
-    z_release();
-    z_stg(tmp2 /*=0*/, owner_address);
+    bind(check_succ);
 
-    z_bru(unlocked); // CC = EQ here
+    // Check if there is a successor.
+    load_and_test_long(tmp2, succ_address);
+    z_brne(set_eq_unlocked); // If so we are done.
 
-    bind(not_ok);
+    // Save the monitor pointer in the current thread, so we can try to
+    // reacquire the lock in SharedRuntime::monitor_exit_helper().
+    if (!UseObjectMonitorTable) {
+      z_xilf(monitor, markWord::monitor_value);
+    }
+    z_stg(monitor, Address(Z_thread, JavaThread::unlocked_inflated_monitor_offset()));
 
-    // The owner may be anonymous, and we removed the last obj entry in
-    // the lock-stack. This loses the information about the owner.
-    // Write the thread to the owner field so the runtime knows the owner.
-    z_stg(Z_thread, owner_address);
-    z_bru(slow_path); // CC = NE here
+    z_ltgr(obj, obj); // Set flag = NE
+    z_bru(slow_path);
+
+    bind(set_eq_unlocked);
+    z_cr(tmp2, tmp2); // Set flag = EQ
   }
 
   bind(unlocked);
diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
index 1990488d8a0..44f897529e7 100644
--- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
@@ -80,8 +80,6 @@ int C2FastUnlockLightweightStub::max_size() const {
 void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
   assert(_t == rax, "must be");
 
-  Label restore_held_monitor_count_and_slow_path;
-
   { // Restore lock-stack and handle the unlock in runtime.
 
     __ bind(_push_and_slow_path);
@@ -91,61 +89,9 @@ void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
     __ movptr(Address(_thread, _t), _obj);
 #endif
     __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize);
-  }
-
-  { // Restore held monitor count and slow path.
-
-    __ bind(restore_held_monitor_count_and_slow_path);
-    __ bind(_slow_path);
-    // Restore held monitor count.
-    __ increment(Address(_thread, JavaThread::held_monitor_count_offset()));
-    // increment will always result in ZF = 0 (no overflows).
+    // addl will always result in ZF = 0 (no overflows).
     __ jmp(slow_path_continuation());
   }
-
-  { // Handle monitor medium path.
-
-    __ bind(_check_successor);
-
-    Label fix_zf_and_unlocked;
-    const Register monitor = _mark;
-
-#ifndef _LP64
-    __ jmpb(restore_held_monitor_count_and_slow_path);
-#else // _LP64
-    const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
-    const Address succ_address(monitor, ObjectMonitor::succ_offset() - monitor_tag);
-    const Address owner_address(monitor, ObjectMonitor::owner_offset() - monitor_tag);
-
-    // successor null check.
-    __ cmpptr(succ_address, NULL_WORD);
-    __ jccb(Assembler::equal, restore_held_monitor_count_and_slow_path);
-
-    // Release lock.
-    __ movptr(owner_address, NULL_WORD);
-
-    // Fence.
-    // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
-    __ lock(); __ addl(Address(rsp, 0), 0);
-
-    // Recheck successor.
-    __ cmpptr(succ_address, NULL_WORD);
-    // Observed a successor after the release -> fence we have handed off the monitor
-    __ jccb(Assembler::notEqual, fix_zf_and_unlocked);
-
-    // Try to relock, if it fails the monitor has been handed over
-    // TODO: Caveat, this may fail due to deflation, which does
-    //       not handle the monitor handoff. Currently only works
-    //       due to the responsible thread.
-    __ xorptr(rax, rax);
-    __ lock(); __ cmpxchgptr(_thread, owner_address);
-    __ jccb  (Assembler::equal, restore_held_monitor_count_and_slow_path);
-#endif
-
-    __ bind(fix_zf_and_unlocked);
-    __ xorl(rax, rax);
-    __ jmp(unlocked_continuation());
-  }
 }
 
 #undef __
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index c2801a791cb..839745f76ec 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -459,87 +459,43 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
   // IA32's memory-model is SPO, so STs are ordered with respect to
   // each other and there's no need for an explicit barrier (fence).
   // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
-#ifndef _LP64
-  // Note that we could employ various encoding schemes to reduce
-  // the number of loads below (currently 4) to just 2 or 3.
-  // Refer to the comments in synchronizer.cpp.
-  // In practice the chain of fetches doesn't seem to impact performance, however.
-  xorptr(boxReg, boxReg);
-  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
-  jccb  (Assembler::notZero, DONE_LABEL);
-  movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-  orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-  jccb  (Assembler::notZero, DONE_LABEL);
-  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
-  jmpb  (DONE_LABEL);
-#else // _LP64
-  // It's inflated
-  Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath;
+  Label LSuccess, LNotRecursive;
 
   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
   jccb(Assembler::equal, LNotRecursive);
 
   // Recursive inflated unlock
-  decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
+  decrement(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
   jmpb(LSuccess);
 
   bind(LNotRecursive);
+
+  // Set owner to null.
+  // Release to satisfy the JMM
+  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+  // We need a full fence after clearing owner to avoid stranding.
+  // StoreLoad achieves this.
+  membar(StoreLoad);
+
+  // Check if the entry lists are empty.
   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
-  jccb  (Assembler::notZero, CheckSucc);
-  // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
-  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
-  jmpb  (DONE_LABEL);
+  jccb(Assembler::zero, LSuccess);    // If so we are done.
 
-  // Try to avoid passing control into the slow_path ...
-  bind  (CheckSucc);
-
-  // The following optional optimization can be elided if necessary
-  // Effectively: if (succ == null) goto slow path
-  // The code reduces the window for a race, however,
-  // and thus benefits performance.
+  // Check if there is a successor.
   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
-  jccb  (Assembler::zero, LGoSlowPath);
+  jccb(Assembler::notZero, LSuccess); // If so we are done.
 
-  xorptr(boxReg, boxReg);
-  // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
-  movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
+  // Save the monitor pointer in the current thread, so we can try to
+  // reacquire the lock in SharedRuntime::monitor_exit_helper().
+  andptr(tmpReg, ~(int32_t)markWord::monitor_value);
+#ifndef _LP64
+  get_thread(boxReg);
+  movptr(Address(boxReg, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#else // _LP64
+  movptr(Address(r15_thread, JavaThread::unlocked_inflated_monitor_offset()), tmpReg);
+#endif
 
-  // Memory barrier/fence
-  // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
-  // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
-  // This is faster on Nehalem and AMD Shanghai/Barcelona.
-  // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
-  // We might also restructure (ST Owner=0;barrier;LD _Succ) to
-  // (mov box,0; xchgq box, &m->Owner; LD _succ) .
-  lock(); addl(Address(rsp, 0), 0);
-
-  cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
-  jccb  (Assembler::notZero, LSuccess);
-
-  // Rare inopportune interleaving - race.
-  // The successor vanished in the small window above.
-  // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
-  // We need to ensure progress and succession.
-  // Try to reacquire the lock.
-  // If that fails then the new owner is responsible for succession and this
-  // thread needs to take no further action and can exit via the fast path (success).
-  // If the re-acquire succeeds then pass control into the slow path.
-  // As implemented, this latter mode is horrible because we generated more
-  // coherence traffic on the lock *and* artificially extended the critical section
-  // length while by virtue of passing control into the slow path.
-
-  // box is really RAX -- the following CMPXCHG depends on that binding
-  // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
-  lock();
-  cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-  // There's no successor so we tried to regrab the lock.
-  // If that didn't work, then another thread grabbed the
-  // lock so we're done (and exit was a success).
-  jccb  (Assembler::notEqual, LSuccess);
-  // Intentional fall-through into slow path
-
-  bind  (LGoSlowPath);
   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
   jmpb  (DONE_LABEL);
 
@@ -547,7 +503,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
   jmpb  (DONE_LABEL);
 
-#endif
   if (LockingMode == LM_LEGACY) {
     bind  (Stacked);
     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
@@ -744,10 +699,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
   // Handle inflated monitor.
   Label inflated, inflated_check_lock_stack;
   // Finish fast unlock successfully.  MUST jump with ZF == 1
-  Label unlocked;
-
-  // Assume success.
-  decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+  Label unlocked, slow_path;
 
   const Register mark = t;
   const Register monitor = t;
@@ -763,8 +715,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
   }
 
   Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
-  Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
-  Label& slow_path = stub == nullptr ? dummy : stub->slow_path();
 
   { // Lightweight Unlock
 
@@ -839,6 +789,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
     const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
     const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
     const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+    const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
     const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
     const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
 
@@ -846,27 +797,42 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
 
     // Check if recursive.
     cmpptr(recursions_address, 0);
-    jccb(Assembler::notEqual, recursive);
+    jccb(Assembler::notZero, recursive);
+
+    // Set owner to null.
+    // Release to satisfy the JMM
+    movptr(owner_address, NULL_WORD);
+    // We need a full fence after clearing owner to avoid stranding.
+    // StoreLoad achieves this.
+    membar(StoreLoad);
 
     // Check if the entry lists are empty.
     movptr(reg_rax, cxq_address);
     orptr(reg_rax, EntryList_address);
-    jcc(Assembler::notZero, check_successor);
+    jccb(Assembler::zero, unlocked);    // If so we are done.
 
-    // Release lock.
-    movptr(owner_address, NULL_WORD);
-    jmpb(unlocked);
+    // Check if there is a successor.
+    cmpptr(succ_address, NULL_WORD);
+    jccb(Assembler::notZero, unlocked); // If so we are done.
+
+    // Save the monitor pointer in the current thread, so we can try to
+    // reacquire the lock in SharedRuntime::monitor_exit_helper().
+    if (!UseObjectMonitorTable) {
+      andptr(monitor, ~(int32_t)markWord::monitor_value);
+    }
+    movptr(Address(thread, JavaThread::unlocked_inflated_monitor_offset()), monitor);
+
+    testl(monitor, monitor);            // Fast Unlock ZF = 0
+    jmpb(slow_path);
 
     // Recursive unlock.
     bind(recursive);
     decrement(recursions_address);
-    xorl(t, t);
   }
 
   bind(unlocked);
-  if (stub != nullptr) {
-    bind(stub->unlocked_continuation());
-  }
+  decrement(Address(thread, JavaThread::held_monitor_count_offset()));
+  xorl(t, t); // Fast Unlock ZF = 1
 
 #ifdef ASSERT
   // Check that unlocked label is reached with ZF set.
@@ -875,6 +841,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax,
   stop("Fast Unlock ZF != 1");
 #endif
 
+  bind(slow_path);
   if (stub != nullptr) {
     bind(stub->slow_path_continuation());
   }
diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp
index 5db7596e072..318bc2f45fc 100644
--- a/src/hotspot/share/opto/c2_CodeStubs.hpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.hpp
@@ -105,7 +105,6 @@ private:
   Register _thread;
   Label _slow_path;
   Label _push_and_slow_path;
-  Label _check_successor;
   Label _unlocked_continuation;
 public:
   C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(),
@@ -114,7 +113,6 @@ public:
   void emit(C2_MacroAssembler& masm);
   Label& slow_path() { return _slow_path; }
   Label& push_and_slow_path() { return _push_and_slow_path; }
-  Label& check_successor() { return _check_successor; }
   Label& unlocked_continuation() { return _unlocked_continuation; }
   Label& slow_path_continuation() { return continuation(); }
 };
diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp
index 3528fc5b1bc..14528f6d908 100644
--- a/src/hotspot/share/runtime/javaThread.cpp
+++ b/src/hotspot/share/runtime/javaThread.cpp
@@ -487,6 +487,7 @@ JavaThread::JavaThread(MemTag mem_tag) :
   _cont_fastpath_thread_state(1),
   _held_monitor_count(0),
   _jni_monitor_count(0),
+  _unlocked_inflated_monitor(nullptr),
 
   _handshake(this),
 
diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp
index e36b7dfe888..20bb08a4acb 100644
--- a/src/hotspot/share/runtime/javaThread.hpp
+++ b/src/hotspot/share/runtime/javaThread.hpp
@@ -464,6 +464,7 @@ class JavaThread: public Thread {
   // It's signed for error detection.
   intx _held_monitor_count;  // used by continuations for fast lock detection
   intx _jni_monitor_count;
+  ObjectMonitor* _unlocked_inflated_monitor;
 
 private:
 
@@ -615,6 +616,12 @@ private:
   intx jni_monitor_count()  { return _jni_monitor_count;  }
   void clear_jni_monitor_count() { _jni_monitor_count = 0;   }
 
+  // Support for SharedRuntime::monitor_exit_helper()
+  ObjectMonitor* unlocked_inflated_monitor() const { return _unlocked_inflated_monitor; }
+  void clear_unlocked_inflated_monitor() {
+    _unlocked_inflated_monitor = nullptr;
+  }
+
   inline bool is_vthread_mounted() const;
   inline const ContinuationEntry* vthread_continuation() const;
 
@@ -828,6 +835,7 @@ private:
   static ByteSize cont_fastpath_offset()      { return byte_offset_of(JavaThread, _cont_fastpath); }
   static ByteSize held_monitor_count_offset() { return byte_offset_of(JavaThread, _held_monitor_count); }
   static ByteSize jni_monitor_count_offset()  { return byte_offset_of(JavaThread, _jni_monitor_count); }
+  static ByteSize unlocked_inflated_monitor_offset() { return byte_offset_of(JavaThread, _unlocked_inflated_monitor); }
 
 #if INCLUDE_JVMTI
   static ByteSize is_in_VTMS_transition_offset()     { return byte_offset_of(JavaThread, _is_in_VTMS_transition); }
diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
index 367d79a5283..755d49d2c6c 100644
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@@ -178,7 +178,7 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr;
 //
 //   Cxq points to the set of Recently Arrived Threads attempting entry.
 //   Because we push threads onto _cxq with CAS, the RATs must take the form of
-//   a singly-linked LIFO.  We drain _cxq into EntryList  at unlock-time when
+//   a singly-linked LIFO.  We drain _cxq into EntryList at unlock-time when
 //   the unlocking thread notices that EntryList is null but _cxq is != null.
 //
 //   The EntryList is ordered by the prevailing queue discipline and
@@ -210,19 +210,6 @@ OopStorage* ObjectMonitor::_oop_storage = nullptr;
 //   unpark the notifyee.  Unparking a notifee in notify() is inefficient -
 //   it's likely the notifyee would simply impale itself on the lock held
 //   by the notifier.
-//
-// * An interesting alternative is to encode cxq as (List,LockByte) where
-//   the LockByte is 0 iff the monitor is owned.  _owner is simply an auxiliary
-//   variable, like _recursions, in the scheme.  The threads or Events that form
-//   the list would have to be aligned in 256-byte addresses.  A thread would
-//   try to acquire the lock or enqueue itself with CAS, but exiting threads
-//   could use a 1-0 protocol and simply STB to set the LockByte to 0.
-//   Note that is is *not* word-tearing, but it does presume that full-word
-//   CAS operations are coherent with intermix with STB operations.  That's true
-//   on most common processors.
-//
-// * See also http://blogs.sun.com/dave
-
 
 // Check that object() and set_object() are called from the right context:
 static void check_object_context() {
@@ -257,7 +244,6 @@ ObjectMonitor::ObjectMonitor(oop object) :
   _EntryList(nullptr),
   _cxq(nullptr),
   _succ(nullptr),
-  _Responsible(nullptr),
   _SpinDuration(ObjectMonitor::Knob_SpinLimit),
   _contentions(0),
   _WaitSet(nullptr),
@@ -320,17 +306,11 @@ bool ObjectMonitor::enter_is_async_deflating() {
   return false;
 }
 
-void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) {
-  // Used by ObjectSynchronizer::enter_for to enter for another thread.
-  // The monitor is private to or already owned by locking_thread which must be suspended.
-  // So this code may only contend with deflation.
-  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
+bool ObjectMonitor::TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) {
   assert(contention_mark._monitor == this, "must be");
   assert(!is_being_async_deflated(), "must be");
 
-
   void* prev_owner = try_set_owner_from(nullptr, locking_thread);
-
   bool success = false;
 
   if (prev_owner == nullptr) {
@@ -343,8 +323,16 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O
     // Racing with deflation.
     prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread);
     if (prev_owner == DEFLATER_MARKER) {
-      // Cancelled deflation. Increment contentions as part of the deflation protocol.
-      add_to_contentions(1);
+      // We successfully cancelled the in-progress async deflation by
+      // changing owner from DEFLATER_MARKER to current.  We now extend
+      // the lifetime of the contention_mark (e.g. contentions++) here
+      // to prevent the deflater thread from winning the last part of
+      // the 2-part async deflation protocol after the regular
+      // decrement occurs when the contention_mark goes out of
+      // scope. ObjectMonitor::deflate_monitor() which is called by
+      // the deflater thread will decrement contentions after it
+      // recognizes that the async deflation was cancelled.
+      contention_mark.extend();
       success = true;
     } else if (prev_owner == nullptr) {
       // At this point we cannot race with deflation as we have both incremented
@@ -360,12 +348,28 @@ void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, O
     set_owner_from_BasicLock(prev_owner, locking_thread);
     success = true;
   }
+  assert(!success || owner_raw() == locking_thread, "must be");
+
+  return success;
+}
+
+void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) {
+  // Used by LightweightSynchronizer::inflate_and_enter in deoptimization path to enter for another thread.
+  // The monitor is private to or already owned by locking_thread which must be suspended.
+  // So this code may only contend with deflation.
+  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
+  bool success = TryLockWithContentionMark(locking_thread, contention_mark);
+
   assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT
-          ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT,
-          p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner));
+         ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}",
+         p2i(locking_thread), p2i(this), p2i(owner_raw()));
 }
 
 bool ObjectMonitor::enter_for(JavaThread* locking_thread) {
+  // Used by ObjectSynchronizer::enter_for() to enter for another thread.
+  // The monitor is private to or already owned by locking_thread which must be suspended.
+  // So this code may only contend with deflation.
+  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
 
   // Block out deflation as soon as possible.
   ObjectMonitorContentionMark contention_mark(this);
@@ -375,19 +379,29 @@ bool ObjectMonitor::enter_for(JavaThread* locking_thread) {
     return false;
   }
 
-  enter_for_with_contention_mark(locking_thread, contention_mark);
+  bool success = TryLockWithContentionMark(locking_thread, contention_mark);
+
+  assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT
+         ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}",
+         p2i(locking_thread), p2i(this), p2i(owner_raw()));
   assert(owner_raw() == locking_thread, "must be");
   return true;
 }
 
-bool ObjectMonitor::try_enter(JavaThread* current) {
-  // TryLock avoids the CAS
+bool ObjectMonitor::try_enter(JavaThread* current, bool check_for_recursion) {
+  // TryLock avoids the CAS and handles deflation.
   TryLockResult r = TryLock(current);
   if (r == TryLockResult::Success) {
     assert(_recursions == 0, "invariant");
     return true;
   }
 
+  // If called from SharedRuntime::monitor_exit_helper(), we know that
+  // this thread doesn't already own the lock.
+  if (!check_for_recursion) {
+    return false;
+  }
+
   if (r == TryLockResult::HasOwner && owner() == current) {
     _recursions++;
     return true;
@@ -400,7 +414,6 @@ bool ObjectMonitor::try_enter(JavaThread* current) {
     set_owner_from_BasicLock(cur, current);  // Convert from BasicLock* to Thread*.
     return true;
   }
-
   return false;
 }
 
@@ -561,16 +574,40 @@ void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonito
 
 ObjectMonitor::TryLockResult ObjectMonitor::TryLock(JavaThread* current) {
   void* own = owner_raw();
-  if (own != nullptr) return TryLockResult::HasOwner;
-  if (try_set_owner_from(nullptr, current) == nullptr) {
-    assert(_recursions == 0, "invariant");
-    return TryLockResult::Success;
+  void* first_own = own;
+
+  for (;;) {
+    if (own == DEFLATER_MARKER) {
+      // Block out deflation as soon as possible.
+      ObjectMonitorContentionMark contention_mark(this);
+
+      // Check for deflation.
+      if (enter_is_async_deflating()) {
+        // Treat deflation as interference.
+        return TryLockResult::Interference;
+      }
+      if (TryLockWithContentionMark(current, contention_mark)) {
+        assert(_recursions == 0, "invariant");
+        return TryLockResult::Success;
+      } else {
+        // Deflation won or change of owner; dont spin
+        break;
+      }
+    } else if (own == nullptr) {
+      void* prev_own = try_set_owner_from(nullptr, current);
+      if (prev_own == nullptr) {
+        assert(_recursions == 0, "invariant");
+        return TryLockResult::Success;
+      } else {
+        // The lock had been free momentarily, but we lost the race to the lock.
+        own = prev_own;
+      }
+    } else {
+      // Retry doesn't make as much sense because the lock was just acquired.
+      break;
+    }
   }
-  // The lock had been free momentarily, but we lost the race to the lock.
-  // Interference -- the CAS failed.
-  // We can either return -1 or retry.
-  // Retry doesn't make as much sense because the lock was just acquired.
-  return TryLockResult::Interference;
+  return first_own == own ? TryLockResult::HasOwner : TryLockResult::Interference;
 }
 
 // Deflate the specified ObjectMonitor if not in-use. Returns true if it
@@ -746,8 +783,6 @@ const char* ObjectMonitor::is_busy_to_string(stringStream* ss) {
   return ss->base();
 }
 
-#define MAX_RECHECK_INTERVAL 1000
-
 void ObjectMonitor::EnterI(JavaThread* current) {
   assert(current->thread_state() == _thread_blocked, "invariant");
 
@@ -755,25 +790,6 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   if (TryLock(current) == TryLockResult::Success) {
     assert(_succ != current, "invariant");
     assert(owner_raw() == current, "invariant");
-    assert(_Responsible != current, "invariant");
-    return;
-  }
-
-  if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) {
-    // Cancelled the in-progress async deflation by changing owner from
-    // DEFLATER_MARKER to current. As part of the contended enter protocol,
-    // contentions was incremented to a positive value before EnterI()
-    // was called and that prevents the deflater thread from winning the
-    // last part of the 2-part async deflation protocol. After EnterI()
-    // returns to enter(), contentions is decremented because the caller
-    // now owns the monitor. We bump contentions an extra time here to
-    // prevent the deflater thread from winning the last part of the
-    // 2-part async deflation protocol after the regular decrement
-    // occurs in enter(). The deflater thread will decrement contentions
-    // after it recognizes that the async deflation was cancelled.
-    add_to_contentions(1);
-    assert(_succ != current, "invariant");
-    assert(_Responsible != current, "invariant");
     return;
   }
 
@@ -789,14 +805,12 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   if (TrySpin(current)) {
     assert(owner_raw() == current, "invariant");
     assert(_succ != current, "invariant");
-    assert(_Responsible != current, "invariant");
     return;
   }
 
   // The Spin failed -- Enqueue and park the thread ...
   assert(_succ != current, "invariant");
   assert(owner_raw() != current, "invariant");
-  assert(_Responsible != current, "invariant");
 
   // Enqueue "current" on ObjectMonitor's _cxq.
   //
@@ -826,40 +840,10 @@ void ObjectMonitor::EnterI(JavaThread* current) {
     if (TryLock(current) == TryLockResult::Success) {
       assert(_succ != current, "invariant");
       assert(owner_raw() == current, "invariant");
-      assert(_Responsible != current, "invariant");
       return;
     }
   }
 
-  // Check for cxq|EntryList edge transition to non-null.  This indicates
-  // the onset of contention.  While contention persists exiting threads
-  // will use a ST:MEMBAR:LD 1-1 exit protocol.  When contention abates exit
-  // operations revert to the faster 1-0 mode.  This enter operation may interleave
-  // (race) a concurrent 1-0 exit operation, resulting in stranding, so we
-  // arrange for one of the contending thread to use a timed park() operations
-  // to detect and recover from the race.  (Stranding is form of progress failure
-  // where the monitor is unlocked but all the contending threads remain parked).
-  // That is, at least one of the contended threads will periodically poll _owner.
-  // One of the contending threads will become the designated "Responsible" thread.
-  // The Responsible thread uses a timed park instead of a normal indefinite park
-  // operation -- it periodically wakes and checks for and recovers from potential
-  // strandings admitted by 1-0 exit operations.   We need at most one Responsible
-  // thread per-monitor at any given moment.  Only threads on cxq|EntryList may
-  // be responsible for a monitor.
-  //
-  // Currently, one of the contended threads takes on the added role of "Responsible".
-  // A viable alternative would be to use a dedicated "stranding checker" thread
-  // that periodically iterated over all the threads (or active monitors) and unparked
-  // successors where there was risk of stranding.  This would help eliminate the
-  // timer scalability issues we see on some platforms as we'd only have one thread
-  // -- the checker -- parked on a timer.
-
-  if (nxt == nullptr && _EntryList == nullptr) {
-    // Try to assume the role of responsible thread for the monitor.
-    // CONSIDER:  ST vs CAS vs { if (Responsible==null) Responsible=current }
-    Atomic::replace_if_null(&_Responsible, current);
-  }
-
   // The lock might have been released while this thread was occupied queueing
   // itself onto _cxq.  To close the race and avoid "stranding" and
   // progress-liveness failure we must resample-retry _owner before parking.
@@ -871,8 +855,6 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   // to defer the state transitions until absolutely necessary,
   // and in doing so avoid some transitions ...
 
-  int recheckInterval = 1;
-
   for (;;) {
 
     if (TryLock(current) == TryLockResult::Success) {
@@ -881,37 +863,12 @@ void ObjectMonitor::EnterI(JavaThread* current) {
     assert(owner_raw() != current, "invariant");
 
     // park self
-    if (_Responsible == current) {
-      current->_ParkEvent->park((jlong) recheckInterval);
-      // Increase the recheckInterval, but clamp the value.
-      recheckInterval *= 8;
-      if (recheckInterval > MAX_RECHECK_INTERVAL) {
-        recheckInterval = MAX_RECHECK_INTERVAL;
-      }
-    } else {
-      current->_ParkEvent->park();
-    }
+    current->_ParkEvent->park();
 
     if (TryLock(current) == TryLockResult::Success) {
       break;
     }
 
-    if (try_set_owner_from(DEFLATER_MARKER, current) == DEFLATER_MARKER) {
-      // Cancelled the in-progress async deflation by changing owner from
-      // DEFLATER_MARKER to current. As part of the contended enter protocol,
-      // contentions was incremented to a positive value before EnterI()
-      // was called and that prevents the deflater thread from winning the
-      // last part of the 2-part async deflation protocol. After EnterI()
-      // returns to enter(), contentions is decremented because the caller
-      // now owns the monitor. We bump contentions an extra time here to
-      // prevent the deflater thread from winning the last part of the
-      // 2-part async deflation protocol after the regular decrement
-      // occurs in enter(). The deflater thread will decrement contentions
-      // after it recognizes that the async deflation was cancelled.
-      add_to_contentions(1);
-      break;
-    }
-
     // The lock is still contested.
 
     // Keep a tally of the # of futile wakeups.
@@ -953,44 +910,23 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   assert(owner_raw() == current, "invariant");
 
   UnlinkAfterAcquire(current, &node);
-  if (_succ == current) _succ = nullptr;
-
-  assert(_succ != current, "invariant");
-  if (_Responsible == current) {
-    _Responsible = nullptr;
-    OrderAccess::fence(); // Dekker pivot-point
-
-    // We may leave threads on cxq|EntryList without a designated
-    // "Responsible" thread.  This is benign.  When this thread subsequently
-    // exits the monitor it can "see" such preexisting "old" threads --
-    // threads that arrived on the cxq|EntryList before the fence, above --
-    // by LDing cxq|EntryList.  Newly arrived threads -- that is, threads
-    // that arrive on cxq after the ST:MEMBAR, above -- will set Responsible
-    // non-null and elect a new "Responsible" timer thread.
-    //
-    // This thread executes:
-    //    ST Responsible=null; MEMBAR    (in enter epilogue - here)
-    //    LD cxq|EntryList               (in subsequent exit)
-    //
-    // Entering threads in the slow/contended path execute:
-    //    ST cxq=nonnull; MEMBAR; LD Responsible (in enter prolog)
-    //    The (ST cxq; MEMBAR) is accomplished with CAS().
-    //
-    // The MEMBAR, above, prevents the LD of cxq|EntryList in the subsequent
-    // exit operation from floating above the ST Responsible=null.
+  if (_succ == current) {
+    _succ = nullptr;
+    // Note that we don't need to do OrderAccess::fence() after clearing
+    // _succ here, since we own the lock.
   }
 
   // We've acquired ownership with CAS().
   // CAS is serializing -- it has MEMBAR/FENCE-equivalent semantics.
   // But since the CAS() this thread may have also stored into _succ,
-  // EntryList, cxq or Responsible.  These meta-data updates must be
+  // EntryList or cxq.  These meta-data updates must be
   // visible __before this thread subsequently drops the lock.
   // Consider what could occur if we didn't enforce this constraint --
   // STs to monitor meta-data and user-data could reorder with (become
   // visible after) the ST in exit that drops ownership of the lock.
   // Some other thread could then acquire the lock, but observe inconsistent
   // or old monitor meta-data and heap data.  That violates the JMM.
-  // To that end, the 1-0 exit() operation must have at least STST|LDST
+  // To that end, the exit() operation must have at least STST|LDST
   // "release" barrier semantics.  Specifically, there must be at least a
   // STST|LDST barrier in exit() before the ST of null into _owner that drops
   // the lock.   The barrier ensures that changes to monitor meta-data and data
@@ -1000,8 +936,7 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   //
   // Critically, any prior STs to _succ or EntryList must be visible before
   // the ST of null into _owner in the *subsequent* (following) corresponding
-  // monitorexit.  Recall too, that in 1-0 mode monitorexit does not necessarily
-  // execute a serializing instruction.
+  // monitorexit.
 
   return;
 }
@@ -1174,39 +1109,32 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren
 // In that case exit() is called with _thread_state == _thread_blocked,
 // but the monitor's _contentions field is > 0, which inhibits reclamation.
 //
-// 1-0 exit
-// ~~~~~~~~
-// ::exit() uses a canonical 1-1 idiom with a MEMBAR although some of
-// the fast-path operators have been optimized so the common ::exit()
-// operation is 1-0, e.g., see macroAssembler_x86.cpp: fast_unlock().
-// The code emitted by fast_unlock() elides the usual MEMBAR.  This
-// greatly improves latency -- MEMBAR and CAS having considerable local
-// latency on modern processors -- but at the cost of "stranding".  Absent the
-// MEMBAR, a thread in fast_unlock() can race a thread in the slow
-// ::enter() path, resulting in the entering thread being stranding
-// and a progress-liveness failure.   Stranding is extremely rare.
-// We use timers (timed park operations) & periodic polling to detect
-// and recover from stranding.  Potentially stranded threads periodically
-// wake up and poll the lock.  See the usage of the _Responsible variable.
+// This is the exit part of the locking protocol, often implemented in
+// C2_MacroAssembler::fast_unlock()
 //
-// The CAS() in enter provides for safety and exclusion, while the CAS or
-// MEMBAR in exit provides for progress and avoids stranding.  1-0 locking
-// eliminates the CAS/MEMBAR from the exit path, but it admits stranding.
-// We detect and recover from stranding with timers.
+//   1. A release barrier ensures that changes to monitor meta-data
+//      (_succ, _EntryList, _cxq) and data protected by the lock will be
+//      visible before we release the lock.
+//   2. Release the lock by clearing the owner.
+//   3. A storeload MEMBAR is needed between releasing the owner and
+//      subsequently reading meta-data to safely determine if the lock is
+//      contended (step 4) without an elected successor (step 5).
+//   4. If both _EntryList and _cxq are null, we are done, since there is no
+//      other thread waiting on the lock to wake up. I.e. there is no
+//      contention.
+//   5. If there is a successor (_succ is non-null), we are done. The
+//      responsibility for guaranteeing progress-liveness has now implicitly
+//      been moved from the exiting thread to the successor.
+//   6. There are waiters in the entry list (_EntryList and/or cxq are
+//      non-null), but there is no successor (_succ is null), so we need to
+//      wake up (unpark) a waiting thread to avoid stranding.
 //
-// If a thread transiently strands it'll park until (a) another
-// thread acquires the lock and then drops the lock, at which time the
-// exiting thread will notice and unpark the stranded thread, or, (b)
-// the timer expires.  If the lock is high traffic then the stranding latency
-// will be low due to (a).  If the lock is low traffic then the odds of
-// stranding are lower, although the worst-case stranding latency
-// is longer.  Critically, we don't want to put excessive load in the
-// platform's timer subsystem.  We want to minimize both the timer injection
-// rate (timers created/sec) as well as the number of timers active at
-// any one time.  (more precisely, we want to minimize timer-seconds, which is
-// the integral of the # of active timers at any instant over time).
-// Both impinge on OS scalability.  Given that, at most one thread parked on
-// a monitor will use a timer.
+// Note that since only the current lock owner can manipulate the _EntryList
+// or drain _cxq, we need to reacquire the lock before we can wake up
+// (unpark) a waiting thread.
+//
+// The CAS() in enter provides for safety and exclusion, while the
+// MEMBAR in exit provides for progress and avoids stranding.
 //
 // There is also the risk of a futile wake-up. If we drop the lock
 // another thread can reacquire the lock immediately, and we can
@@ -1248,10 +1176,6 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) {
     return;
   }
 
-  // Invariant: after setting Responsible=null an thread must execute
-  // a MEMBAR or other serializing instruction before fetching EntryList|cxq.
-  _Responsible = nullptr;
-
 #if INCLUDE_JFR
   // get the owner's thread id for the MonitorEnter event
   // if it is enabled and the thread isn't suspended
@@ -1278,14 +1202,15 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) {
     // Other threads are blocked trying to acquire the lock.
 
     // Normally the exiting thread is responsible for ensuring succession,
-    // but if other successors are ready or other entering threads are spinning
-    // then this thread can simply store null into _owner and exit without
-    // waking a successor.  The existence of spinners or ready successors
-    // guarantees proper succession (liveness).  Responsibility passes to the
-    // ready or running successors.  The exiting thread delegates the duty.
-    // More precisely, if a successor already exists this thread is absolved
-    // of the responsibility of waking (unparking) one.
-    //
+    // but if this thread observes other successors are ready or other
+    // entering threads are spinning after it has stored null into _owner
+    // then it can exit without waking a successor.  The existence of
+    // spinners or ready successors guarantees proper succession (liveness).
+    // Responsibility passes to the ready or running successors.  The exiting
+    // thread delegates the duty.  More precisely, if a successor already
+    // exists this thread is absolved of the responsibility of waking
+    // (unparking) one.
+
     // The _succ variable is critical to reducing futile wakeup frequency.
     // _succ identifies the "heir presumptive" thread that has been made
     // ready (unparked) but that has not yet run.  We need only one such
@@ -1296,24 +1221,20 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) {
     // Note that spinners in Enter() also set _succ non-null.
     // In the current implementation spinners opportunistically set
     // _succ so that exiting threads might avoid waking a successor.
-    // Another less appealing alternative would be for the exiting thread
-    // to drop the lock and then spin briefly to see if a spinner managed
-    // to acquire the lock.  If so, the exiting thread could exit
-    // immediately without waking a successor, otherwise the exiting
-    // thread would need to dequeue and wake a successor.
-    // (Note that we'd need to make the post-drop spin short, but no
-    // shorter than the worst-case round-trip cache-line migration time.
-    // The dropped lock needs to become visible to the spinner, and then
-    // the acquisition of the lock by the spinner must become visible to
-    // the exiting thread).
+    // Which means that the exiting thread could exit immediately without
+    // waking a successor, if it observes a successor after it has dropped
+    // the lock.  Note that the dropped lock needs to become visible to the
+    // spinner.
 
     // It appears that an heir-presumptive (successor) must be made ready.
     // Only the current lock owner can manipulate the EntryList or
     // drain _cxq, so we need to reacquire the lock.  If we fail
     // to reacquire the lock the responsibility for ensuring succession
     // falls to the new owner.
-    //
-    if (try_set_owner_from(nullptr, current) != nullptr) {
+
+    if (TryLock(current) != TryLockResult::Success) {
+      // Some other thread acquired the lock (or the monitor was
+      // deflated). Either way we are done.
       return;
     }
 
@@ -1376,7 +1297,7 @@ void ObjectMonitor::exit(JavaThread* current, bool not_suspended) {
       q = p;
     }
 
-    // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = nullptr
+    // We need to: ST EntryList; MEMBAR #storestore; ST _owner = nullptr
     // The MEMBAR is satisfied by the release_store() operation in ExitEpilog().
 
     // See if we can abdicate to a spinner instead of waking a thread.
@@ -1566,8 +1487,6 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
   AddWaiter(&node);
   Thread::SpinRelease(&_WaitSetLock);
 
-  _Responsible = nullptr;
-
   intx save = _recursions;     // record the old recursion count
   _waiters++;                  // increment the number of waiters
   _recursions = 0;             // set the recursion level to be 1
@@ -2245,7 +2164,6 @@ void ObjectMonitor::print() const { print_on(tty); }
 //   _EntryList = 0x0000000000000000
 //   _cxq = 0x0000000000000000
 //   _succ = 0x0000000000000000
-//   _Responsible = 0x0000000000000000
 //   _SpinDuration = 5000
 //   _contentions = 0
 //   _WaitSet = 0x0000700009756248
@@ -2274,7 +2192,6 @@ void ObjectMonitor::print_debug_style_on(outputStream* st) const {
   st->print_cr("  _EntryList = " INTPTR_FORMAT, p2i(_EntryList));
   st->print_cr("  _cxq = " INTPTR_FORMAT, p2i(_cxq));
   st->print_cr("  _succ = " INTPTR_FORMAT, p2i(_succ));
-  st->print_cr("  _Responsible = " INTPTR_FORMAT, p2i(_Responsible));
   st->print_cr("  _SpinDuration = %d", _SpinDuration);
   st->print_cr("  _contentions = %d", contentions());
   st->print_cr("  _WaitSet = " INTPTR_FORMAT, p2i(_WaitSet));
diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp
index ef85559c2b6..30d2e509416 100644
--- a/src/hotspot/share/runtime/objectMonitor.hpp
+++ b/src/hotspot/share/runtime/objectMonitor.hpp
@@ -179,7 +179,6 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
 
   ObjectWaiter* volatile _cxq;      // LL of recently-arrived threads blocked on entry.
   JavaThread* volatile _succ;       // Heir presumptive thread - used for futile wakeup throttling
-  JavaThread* volatile _Responsible;
 
   volatile int _SpinDuration;
 
@@ -348,7 +347,7 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   void      enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark);
   bool      enter_for(JavaThread* locking_thread);
   bool      enter(JavaThread* current);
-  bool      try_enter(JavaThread* current);
+  bool      try_enter(JavaThread* current, bool check_for_recursion = true);
   bool      spin_enter(JavaThread* current);
   void      enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark);
   void      exit(JavaThread* current, bool not_suspended = true);
@@ -377,6 +376,7 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
 
   enum class TryLockResult { Interference = -1, HasOwner = 0, Success = 1 };
 
+  bool           TryLockWithContentionMark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark);
   TryLockResult  TryLock(JavaThread* current);
 
   bool      TrySpin(JavaThread* current);
@@ -395,12 +395,17 @@ class ObjectMonitorContentionMark : StackObj {
   DEBUG_ONLY(friend class ObjectMonitor;)
 
   ObjectMonitor* _monitor;
+  bool _extended;
 
   NONCOPYABLE(ObjectMonitorContentionMark);
 
  public:
   explicit ObjectMonitorContentionMark(ObjectMonitor* monitor);
   ~ObjectMonitorContentionMark();
+
+  // Extends the contention scope beyond this objects lifetime.
+  // Requires manual decrement of the contentions counter.
+  void extend();
 };
 
 #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP
diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp
index d26c459b1b4..6d3c6ff24c3 100644
--- a/src/hotspot/share/runtime/objectMonitor.inline.hpp
+++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp
@@ -206,15 +206,32 @@ inline void ObjectMonitor::set_next_om(ObjectMonitor* new_value) {
   Atomic::store(&_next_om, new_value);
 }
 
+// Block out deflation.
 inline ObjectMonitorContentionMark::ObjectMonitorContentionMark(ObjectMonitor* monitor)
-  : _monitor(monitor) {
+  : _monitor(monitor), _extended(false) {
+  // Contentions is incremented to a positive value as part of the
+  // contended enter protocol, which prevents the deflater thread from
+  // winning the last part of the 2-part async deflation
+  // protocol. See: ObjectMonitor::deflate_monitor() and
+  // ObjectMonitor::TryLockWithContentionMark().
   _monitor->add_to_contentions(1);
 }
 
 inline ObjectMonitorContentionMark::~ObjectMonitorContentionMark() {
+  // Decrement contentions when the contention mark goes out of
+  // scope. This opens up for deflation, if the contention mark
+  // hasn't been extended.
   _monitor->add_to_contentions(-1);
 }
 
+inline void ObjectMonitorContentionMark::extend() {
+  // Used by ObjectMonitor::TryLockWithContentionMark() to "extend the
+  // lifetime" of the contention mark.
+  assert(!_extended, "extending twice is probably a bad design");
+  _monitor->add_to_contentions(1);
+  _extended = true;
+}
+
 inline oop ObjectMonitor::object_peek() const {
   if (_object.is_null()) {
     return nullptr;
diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
index 6ca7f42e038..e4d4e6aea0f 100644
--- a/src/hotspot/share/runtime/sharedRuntime.cpp
+++ b/src/hotspot/share/runtime/sharedRuntime.cpp
@@ -1963,6 +1963,26 @@ void SharedRuntime::monitor_exit_helper(oopDesc* obj, BasicLock* lock, JavaThrea
   assert(JavaThread::current() == current, "invariant");
   // Exit must be non-blocking, and therefore no exceptions can be thrown.
   ExceptionMark em(current);
+
+  // Check if C2_MacroAssembler::fast_unlock() or
+  // C2_MacroAssembler::fast_unlock_lightweight() unlocked an inflated
+  // monitor before going slow path.  Since there is no safepoint
+  // polling when calling into the VM, we can be sure that the monitor
+  // hasn't been deallocated.
+  ObjectMonitor* m = current->unlocked_inflated_monitor();
+  if (m != nullptr) {
+    assert(m->owner_raw() != current, "must be");
+    current->clear_unlocked_inflated_monitor();
+
+    // We need to reacquire the lock before we can call ObjectSynchronizer::exit().
+    if (!m->try_enter(current, /*check_for_recursion*/ false)) {
+      // Some other thread acquired the lock (or the monitor was
+      // deflated). Either way we are done.
+      current->dec_held_monitor_count();
+      return;
+    }
+  }
+
   // The object could become unlocked through a JNI call, which we have no other checks for.
   // Give a fatal message if CheckJNICalls. Otherwise we ignore it.
   if (obj->is_unlocked()) {
diff --git a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
index 3ed862e8218..39c8569532e 100644
--- a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
+++ b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
@@ -309,10 +309,11 @@ public class LockUnlock {
     }
 
     /**
-     * With two threads lockObject1 will be contended so should be
-     * inflated.
+     * With three threads lockObject1 will be contended so should be
+     * inflated. Three threads is also needed to ensure a high level
+     * of code coverage in the locking code.
      */
-    @Threads(2)
+    @Threads(3)
     @Benchmark
     public void testContendedLock() {
         synchronized (lockObject1) {