From 535804554deef213d056cbd6bce14aeff04c32fb Mon Sep 17 00:00:00 2001 From: Doug Lea Date: Wed, 13 Jul 2022 18:05:42 +0000 Subject: [PATCH] 8066859: java/lang/ref/OOMEInReferenceHandler.java failed with java.lang.Exception: Reference Handler thread died Reviewed-by: alanb --- .../locks/AbstractQueuedLongSynchronizer.java | 135 ++++++++++++---- .../locks/AbstractQueuedSynchronizer.java | 147 ++++++++++++++---- test/jdk/ProblemList-Xcomp.txt | 1 - test/jdk/ProblemList.txt | 1 - .../util/concurrent/locks/Lock/OOMEInAQS.java | 128 +++++++++++++++ 5 files changed, 351 insertions(+), 61 deletions(-) create mode 100644 test/jdk/java/util/concurrent/locks/Lock/OOMEInAQS.java diff --git a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedLongSynchronizer.java b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedLongSynchronizer.java index 9a18002ebd5..c7631dc76cc 100644 --- a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedLongSynchronizer.java +++ b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedLongSynchronizer.java @@ -195,31 +195,53 @@ public abstract class AbstractQueuedLongSynchronizer return U.compareAndSetReference(this, TAIL, c, v); } - /** tries once to CAS a new dummy node for head */ - private void tryInitializeHead() { - Node h = new ExclusiveNode(); - if (U.compareAndSetReference(this, HEAD, null, h)) - tail = h; + /** + * Tries to CAS a new dummy node for head. + * Returns new tail, or null if OutOfMemory + */ + private Node tryInitializeHead() { + for (Node h = null, t;;) { + if ((t = tail) != null) + return t; + else if (head != null) + Thread.onSpinWait(); + else { + if (h == null) { + try { + h = new ExclusiveNode(); + } catch (OutOfMemoryError oome) { + return null; + } + } + if (U.compareAndSetReference(this, HEAD, null, h)) + return tail = h; + } + } } + /** * Enqueues the node unless null. (Currently used only for * ConditionNodes; other cases are interleaved with acquires.) */ - final void enqueue(Node node) { + final void enqueue(ConditionNode node) { if (node != null) { - for (;;) { - Node t = tail; + boolean unpark = false; + for (Node t;;) { + if ((t = tail) == null && (t = tryInitializeHead()) == null) { + unpark = true; // wake up to spin on OOME + break; + } node.setPrevRelaxed(t); // avoid unnecessary fence - if (t == null) // initialize - tryInitializeHead(); - else if (casTail(t, node)) { + if (casTail(t, node)) { t.next = node; if (t.status < 0) // wake up to clean link - LockSupport.unpark(node.waiter); + unpark = true; break; } } + if (unpark) + LockSupport.unpark(node.waiter); } } @@ -278,7 +300,10 @@ public abstract class AbstractQueuedLongSynchronizer * Check if node now first * if so, ensure head stable, else ensure valid predecessor * if node is first or not yet enqueued, try acquiring + * else if queue is not initialized, do so by attaching new header node + * resort to spinwait on OOME trying to create node * else if node not yet created, create it + * resort to spinwait on OOME trying to create node * else if not yet enqueued, try once to enqueue * else if woken from park, retry (up to postSpins times) * else if WAITING status not set, set and retry @@ -321,18 +346,20 @@ public abstract class AbstractQueuedLongSynchronizer return 1; } } - if (node == null) { // allocate; retry before enqueue - if (shared) - node = new SharedNode(); - else - node = new ExclusiveNode(); + Node t; + if ((t = tail) == null) { // initialize queue + if (tryInitializeHead() == null) + return acquireOnOOME(shared, arg); + } else if (node == null) { // allocate; retry before enqueue + try { + node = (shared) ? new SharedNode() : new ExclusiveNode(); + } catch (OutOfMemoryError oome) { + return acquireOnOOME(shared, arg); + } } else if (pred == null) { // try to enqueue node.waiter = current; - Node t = tail; node.setPrevRelaxed(t); // avoid unnecessary fence - if (t == null) - tryInitializeHead(); - else if (!casTail(t, node)) + if (!casTail(t, node)) node.setPrevRelaxed(null); // back out else t.next = node; @@ -358,9 +385,23 @@ public abstract class AbstractQueuedLongSynchronizer return cancelAcquire(node, interrupted, interruptible); } + /** + * Spin-waits with backoff; used only upon OOME failures during acquire. + */ + private int acquireOnOOME(boolean shared, long arg) { + for (long nanos = 1L;;) { + if (shared ? (tryAcquireShared(arg) >= 0) : tryAcquire(arg)) + return 1; + U.park(false, nanos); // must use Unsafe park to sleep + if (nanos < 1L << 30) // max about 1 second + nanos <<= 1; + } + } + /** * Possibly repeatedly traverses from tail, unsplicing cancelled - * nodes until none are found. + * nodes until none are found. Unparks nodes that may have been + * relinked to be next eligible acquirer. */ private void cleanQueue() { for (;;) { // restart point @@ -1067,6 +1108,12 @@ public abstract class AbstractQueuedLongSynchronizer /** Last node of condition queue. */ private transient ConditionNode lastWaiter; + /** + * Fixed delay in nanoseconds between releasing and reacquiring + * lock during Condition waits that encounter OutOfMemoryErrors + */ + static final long OOME_COND_WAIT_DELAY = 10L * 1000L * 1000L; // 10 ms + /** * Creates a new {@code ConditionObject} instance. */ @@ -1103,7 +1150,7 @@ public abstract class AbstractQueuedLongSynchronizer ConditionNode first = firstWaiter; if (!isHeldExclusively()) throw new IllegalMonitorStateException(); - if (first != null) + else if (first != null) doSignal(first, false); } @@ -1118,7 +1165,7 @@ public abstract class AbstractQueuedLongSynchronizer ConditionNode first = firstWaiter; if (!isHeldExclusively()) throw new IllegalMonitorStateException(); - if (first != null) + else if (first != null) doSignal(first, true); } @@ -1185,6 +1232,26 @@ public abstract class AbstractQueuedLongSynchronizer } } + /** + * Constructs objects needed for condition wait. On OOME, + * releases lock, sleeps, reacquires, and returns null. + */ + private ConditionNode newConditionNode() { + long savedState; + if (tryInitializeHead() != null) { + try { + return new ConditionNode(); + } catch (OutOfMemoryError oome) { + } + } + // fall through if encountered OutOfMemoryError + if (!isHeldExclusively() || !release(savedState = getState())) + throw new IllegalMonitorStateException(); + U.park(false, OOME_COND_WAIT_DELAY); + acquireOnOOME(false, savedState); + return null; + } + /** * Implements uninterruptible condition wait. *
    @@ -1197,7 +1264,9 @@ public abstract class AbstractQueuedLongSynchronizer *
*/ public final void awaitUninterruptibly() { - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return; long savedState = enableWait(node); LockSupport.setCurrentBlocker(this); // for back-compatibility boolean interrupted = false, rejected = false; @@ -1241,7 +1310,9 @@ public abstract class AbstractQueuedLongSynchronizer public final void await() throws InterruptedException { if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return; long savedState = enableWait(node); LockSupport.setCurrentBlocker(this); // for back-compatibility boolean interrupted = false, cancelled = false, rejected = false; @@ -1292,7 +1363,9 @@ public abstract class AbstractQueuedLongSynchronizer throws InterruptedException { if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return nanosTimeout - OOME_COND_WAIT_DELAY; long savedState = enableWait(node); long nanos = (nanosTimeout < 0L) ? 0L : nanosTimeout; long deadline = System.nanoTime() + nanos; @@ -1336,7 +1409,9 @@ public abstract class AbstractQueuedLongSynchronizer long abstime = deadline.getTime(); if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return false; long savedState = enableWait(node); boolean cancelled = false, interrupted = false; while (!canReacquire(node)) { @@ -1377,7 +1452,9 @@ public abstract class AbstractQueuedLongSynchronizer long nanosTimeout = unit.toNanos(time); if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return false; long savedState = enableWait(node); long nanos = (nanosTimeout < 0L) ? 0L : nanosTimeout; long deadline = System.nanoTime() + nanos; diff --git a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java index 618ba7b05f3..36d21d3fcfc 100644 --- a/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java +++ b/src/java.base/share/classes/java/util/concurrent/locks/AbstractQueuedSynchronizer.java @@ -432,6 +432,19 @@ public abstract class AbstractQueuedSynchronizer * methods. (It is usually easy for compilers to optimize * call-site specializations when heavily used.) * + * Most AQS methods may be called by JDK components that cannot be + * allowed to fail when encountering OutOfMemoryErrors. The main + * acquire method resorts to spin-waits with backoff if nodes + * cannot be allocated. Condition waits release and reacquire + * locks upon OOME at a slow fixed rate (OOME_COND_WAIT_DELAY) + * designed with the hope that eventually enough memory will be + * recovered; if not performance can be very slow. Effectiveness + * is also limited by the possibility of class loading triggered + * by first-time usages, that may encounter unrecoverable + * OOMEs. Also, it is possible for OutOfMemoryErrors to be thrown + * when attempting to create and throw + * IllegalMonitorStateExceptions and InterruptedExceptions. + * * There are several arbitrary decisions about when and how to * check interrupts in both acquire and await before and/or after * blocking. The decisions are less arbitrary in implementation @@ -562,31 +575,52 @@ public abstract class AbstractQueuedSynchronizer return U.compareAndSetReference(this, TAIL, c, v); } - /** tries once to CAS a new dummy node for head */ - private void tryInitializeHead() { - Node h = new ExclusiveNode(); - if (U.compareAndSetReference(this, HEAD, null, h)) - tail = h; + /** + * Tries to CAS a new dummy node for head. + * Returns new tail, or null if OutOfMemory + */ + private Node tryInitializeHead() { + for (Node h = null, t;;) { + if ((t = tail) != null) + return t; + else if (head != null) + Thread.onSpinWait(); + else { + if (h == null) { + try { + h = new ExclusiveNode(); + } catch (OutOfMemoryError oome) { + return null; + } + } + if (U.compareAndSetReference(this, HEAD, null, h)) + return tail = h; + } + } } /** * Enqueues the node unless null. (Currently used only for * ConditionNodes; other cases are interleaved with acquires.) */ - final void enqueue(Node node) { + final void enqueue(ConditionNode node) { if (node != null) { - for (;;) { - Node t = tail; + boolean unpark = false; + for (Node t;;) { + if ((t = tail) == null && (t = tryInitializeHead()) == null) { + unpark = true; // wake up to spin on OOME + break; + } node.setPrevRelaxed(t); // avoid unnecessary fence - if (t == null) // initialize - tryInitializeHead(); - else if (casTail(t, node)) { + if (casTail(t, node)) { t.next = node; if (t.status < 0) // wake up to clean link - LockSupport.unpark(node.waiter); + unpark = true; break; } } + if (unpark) + LockSupport.unpark(node.waiter); } } @@ -638,14 +672,17 @@ public abstract class AbstractQueuedSynchronizer Thread current = Thread.currentThread(); byte spins = 0, postSpins = 0; // retries upon unpark of first thread boolean interrupted = false, first = false; - Node pred = null; // predecessor of node when enqueued + Node pred = null; // predecessor of node when enqueued /* * Repeatedly: * Check if node now first * if so, ensure head stable, else ensure valid predecessor * if node is first or not yet enqueued, try acquiring + * else if queue is not initialized, do so by attaching new header node + * resort to spinwait on OOME trying to create node * else if node not yet created, create it + * resort to spinwait on OOME trying to create node * else if not yet enqueued, try once to enqueue * else if woken from park, retry (up to postSpins times) * else if WAITING status not set, set and retry @@ -688,18 +725,20 @@ public abstract class AbstractQueuedSynchronizer return 1; } } - if (node == null) { // allocate; retry before enqueue - if (shared) - node = new SharedNode(); - else - node = new ExclusiveNode(); + Node t; + if ((t = tail) == null) { // initialize queue + if (tryInitializeHead() == null) + return acquireOnOOME(shared, arg); + } else if (node == null) { // allocate; retry before enqueue + try { + node = (shared) ? new SharedNode() : new ExclusiveNode(); + } catch (OutOfMemoryError oome) { + return acquireOnOOME(shared, arg); + } } else if (pred == null) { // try to enqueue node.waiter = current; - Node t = tail; node.setPrevRelaxed(t); // avoid unnecessary fence - if (t == null) - tryInitializeHead(); - else if (!casTail(t, node)) + if (!casTail(t, node)) node.setPrevRelaxed(null); // back out else t.next = node; @@ -725,6 +764,19 @@ public abstract class AbstractQueuedSynchronizer return cancelAcquire(node, interrupted, interruptible); } + /** + * Spin-waits with backoff; used only upon OOME failures during acquire. + */ + private int acquireOnOOME(boolean shared, int arg) { + for (long nanos = 1L;;) { + if (shared ? (tryAcquireShared(arg) >= 0) : tryAcquire(arg)) + return 1; + U.park(false, nanos); // must use Unsafe park to sleep + if (nanos < 1L << 30) // max about 1 second + nanos <<= 1; + } + } + /** * Possibly repeatedly traverses from tail, unsplicing cancelled * nodes until none are found. Unparks nodes that may have been @@ -1435,6 +1487,12 @@ public abstract class AbstractQueuedSynchronizer /** Last node of condition queue. */ private transient ConditionNode lastWaiter; + /** + * Fixed delay in nanoseconds between releasing and reacquiring + * lock during Condition waits that encounter OutOfMemoryErrors + */ + static final long OOME_COND_WAIT_DELAY = 10L * 1000L * 1000L; // 10 ms + /** * Creates a new {@code ConditionObject} instance. */ @@ -1471,7 +1529,7 @@ public abstract class AbstractQueuedSynchronizer ConditionNode first = firstWaiter; if (!isHeldExclusively()) throw new IllegalMonitorStateException(); - if (first != null) + else if (first != null) doSignal(first, false); } @@ -1486,7 +1544,7 @@ public abstract class AbstractQueuedSynchronizer ConditionNode first = firstWaiter; if (!isHeldExclusively()) throw new IllegalMonitorStateException(); - if (first != null) + else if (first != null) doSignal(first, true); } @@ -1553,6 +1611,26 @@ public abstract class AbstractQueuedSynchronizer } } + /** + * Constructs objects needed for condition wait. On OOME, + * releases lock, sleeps, reacquires, and returns null. + */ + private ConditionNode newConditionNode() { + int savedState; + if (tryInitializeHead() != null) { + try { + return new ConditionNode(); + } catch (OutOfMemoryError oome) { + } + } + // fall through if encountered OutOfMemoryError + if (!isHeldExclusively() || !release(savedState = getState())) + throw new IllegalMonitorStateException(); + U.park(false, OOME_COND_WAIT_DELAY); + acquireOnOOME(false, savedState); + return null; + } + /** * Implements uninterruptible condition wait. *
    @@ -1565,7 +1643,9 @@ public abstract class AbstractQueuedSynchronizer *
*/ public final void awaitUninterruptibly() { - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return; int savedState = enableWait(node); LockSupport.setCurrentBlocker(this); // for back-compatibility boolean interrupted = false, rejected = false; @@ -1609,7 +1689,9 @@ public abstract class AbstractQueuedSynchronizer public final void await() throws InterruptedException { if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return; int savedState = enableWait(node); LockSupport.setCurrentBlocker(this); // for back-compatibility boolean interrupted = false, cancelled = false, rejected = false; @@ -1660,7 +1742,9 @@ public abstract class AbstractQueuedSynchronizer throws InterruptedException { if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return nanosTimeout - OOME_COND_WAIT_DELAY; int savedState = enableWait(node); long nanos = (nanosTimeout < 0L) ? 0L : nanosTimeout; long deadline = System.nanoTime() + nanos; @@ -1704,7 +1788,9 @@ public abstract class AbstractQueuedSynchronizer long abstime = deadline.getTime(); if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return false; int savedState = enableWait(node); boolean cancelled = false, interrupted = false; while (!canReacquire(node)) { @@ -1745,7 +1831,9 @@ public abstract class AbstractQueuedSynchronizer long nanosTimeout = unit.toNanos(time); if (Thread.interrupted()) throw new InterruptedException(); - ConditionNode node = new ConditionNode(); + ConditionNode node = newConditionNode(); + if (node == null) + return false; int savedState = enableWait(node); long nanos = (nanosTimeout < 0L) ? 0L : nanosTimeout; long deadline = System.nanoTime() + nanos; @@ -1851,7 +1939,6 @@ public abstract class AbstractQueuedSynchronizer = U.objectFieldOffset(AbstractQueuedSynchronizer.class, "head"); private static final long TAIL = U.objectFieldOffset(AbstractQueuedSynchronizer.class, "tail"); - static { Class ensureLoaded = LockSupport.class; } diff --git a/test/jdk/ProblemList-Xcomp.txt b/test/jdk/ProblemList-Xcomp.txt index d0142818793..4e5b580e152 100644 --- a/test/jdk/ProblemList-Xcomp.txt +++ b/test/jdk/ProblemList-Xcomp.txt @@ -29,5 +29,4 @@ java/lang/invoke/MethodHandles/CatchExceptionTest.java 8146623 generic-all java/lang/ref/ReferenceEnqueue.java 8284236 generic-all -java/lang/ref/OOMEInReferenceHandler.java 8066859 generic-all java/lang/reflect/callerCache/ReflectionCallerCacheTest.java 8288286 generic-all diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt index fdb0af60e4e..5f3df8ec1a2 100644 --- a/test/jdk/ProblemList.txt +++ b/test/jdk/ProblemList.txt @@ -495,7 +495,6 @@ java/lang/invoke/LFCaching/LFMultiThreadCachingTest.java 8151492 generic- java/lang/invoke/LFCaching/LFGarbageCollectedTest.java 8078602 generic-all java/lang/invoke/lambda/LambdaFileEncodingSerialization.java 8249079 linux-x64 java/lang/invoke/RicochetTest.java 8251969 generic-all -java/lang/ref/OOMEInReferenceHandler.java 8066859 generic-all ############################################################################ diff --git a/test/jdk/java/util/concurrent/locks/Lock/OOMEInAQS.java b/test/jdk/java/util/concurrent/locks/Lock/OOMEInAQS.java new file mode 100644 index 00000000000..ee1f4b371f4 --- /dev/null +++ b/test/jdk/java/util/concurrent/locks/Lock/OOMEInAQS.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Phaser; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Consumer; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +/** + * @test + * @bug 8066859 + * @summary Check that AQS-based locks, conditions, and CountDownLatches do not fail when encountering OOME + * @run main/othervm -XX:-UseGCOverheadLimit -Xmx24M -XX:-UseTLAB OOMEInAQS + */ + +public class OOMEInAQS extends Thread { + static final int NTHREADS = 2; // intentionally not a scalable test; > 2 is very slow + static final int NREPS = 100; + // statically allocate + static final ReentrantLock mainLock = new ReentrantLock(); + static final Condition condition = mainLock.newCondition(); + static final CountDownLatch started = new CountDownLatch(1); + static final CountDownLatch filled = new CountDownLatch(1); + static volatile Object data; + static int turn; + + /** + * For each of NTHREADS threads, REPS times: Take turns + * executing. Introduce OOM using fillHeap during runs. + */ + public static void main(String[] args) throws Throwable { + OOMEInAQS[] threads = new OOMEInAQS[NTHREADS]; + for (int i = 0; i < NTHREADS; ++i) + (threads[i] = new OOMEInAQS(i)).start(); + started.countDown(); + long t0 = System.nanoTime(); + data = fillHeap(); + filled.countDown(); + long t1 = System.nanoTime(); + for (int i = 0; i < NTHREADS; ++i) + threads[i].join(); + data = null; // free heap before reporting and terminating + System.gc(); + System.out.println( + "fillHeap time: " + (t1 - t0) / 1000_000 + + " millis, whole test time: " + (System.nanoTime() - t0) / 1000_000 + + " millis" + ); + } + + final int tid; + OOMEInAQS(int tid) { + this.tid = tid; + } + + @Override + public void run() { + int id = tid, nextId = (id + 1) % NTHREADS; + final ReentrantLock lock = mainLock; + final Condition cond = condition; + try { + started.await(); + for (int i = 0; i < NREPS; i++) { + try { + lock.lock(); + while (turn != id) + cond.await(); + turn = nextId; + cond.signalAll(); + } finally { + lock.unlock(); + } + if (i == 2) // Subsequent AQS methods encounter OOME + filled.await(); + } + } catch (Throwable ex) { // Could be InterruptedExeption or OOME + data = null; + System.exit(0); // avoid getting stuck trying to recover + } + } + + static Object[] fillHeap() { + Object[] first = null, last = null; + int size = 1 << 20; + while (size > 0) { + try { + Object[] array = new Object[size]; + if (first == null) { + first = array; + } else { + last[0] = array; + } + last = array; + } catch (OutOfMemoryError oome) { + size = size >>> 1; + } + } + return first; + } +}