8299975: Limit underflow protection CMoveINode in PhaseIdealLoop::do_unroll must also protect type from underflow

Reviewed-by: roland, chagedorn
2023-01-23 13:10:42 +00:00 · 2023-01-23 13:10:42 +00:00 · 5a4945c0d9
commit 5a4945c0d9
parent f307e8c667
2 changed files with 181 additions and 21 deletions
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -2306,35 +2306,55 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
        //
        BoolTest::mask bt = loop_end->test_trip();
        assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
-        Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
+        Node* underflow_clamp = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
-        set_ctrl(adj_max, C->root());
+        set_ctrl(underflow_clamp, C->root());
-        Node* old_limit = NULL;
+        Node* limit_before_underflow = NULL;
-        Node* adj_limit = NULL;
+        Node* prev_limit = NULL;
        Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
        if (loop_head->unrolled_count() > 1 &&
            limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
-            limit->in(CMoveNode::IfTrue) == adj_max &&
+            limit->in(CMoveNode::IfTrue) == underflow_clamp &&
            bol->as_Bool()->_test._test == bt &&
            bol->in(1)->Opcode() == Op_CmpI &&
            bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
-          // Loop was unrolled before.
+          // Loop was unrolled before, and had an unrolling protection CMoveI.
-          // Optimize the limit to avoid nested CMove:
+          // Use inputs to previous CMoveI for the new one:
-          // use original limit as old limit.
+          prev_limit = limit->in(CMoveNode::IfFalse); // unpack previous limit with underflow
-          old_limit = bol->in(1)->in(1);
+          limit_before_underflow = bol->in(1)->in(1); // CMoveI -> Bool -> CmpI -> limit_before_underflow
          // Adjust previous adjusted limit.
          adj_limit = limit->in(CMoveNode::IfFalse);
          adj_limit = new SubINode(adj_limit, stride);
        } else {
-          old_limit = limit;
+          // Loop was not unrolled before, or the limit did not underflow in a previous unrolling.
-          adj_limit = new SubINode(limit, stride);
+          prev_limit = limit;
          limit_before_underflow = limit;
        }
-        assert(old_limit != NULL && adj_limit != NULL, "");
+        //                           prev_limit   stride
-        register_new_node(adj_limit, ctrl); // adjust amount
+        //                                |         |
-        Node* adj_cmp = new CmpINode(old_limit, adj_limit);
+        // limit_before_underflow  new_limit_with_underflow (SubI)
-        register_new_node(adj_cmp, ctrl);
+        //                   |        |           |
-        Node* adj_bool = new BoolNode(adj_cmp, bt);
+        //                 underflow_cmp          |
-        register_new_node(adj_bool, ctrl);
+        //                   |                    |
-        new_limit = new CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
+        //                 underflow_bool [lt/gt] |
        //                   |                    |
        //                   +----+  +------------+
        //                        |  |
        //                        |  | underflow_clamp (min_jint/max_jint)
        //                        |  |  |
        //                       CMoveINode ([min_jint..hi] / [lo..max_jing])
        //
        assert(limit_before_underflow != NULL && prev_limit != NULL, "must find them");
        Node* new_limit_with_underflow = new SubINode(prev_limit, stride);
        register_new_node(new_limit_with_underflow, ctrl);
        // We must compare with limit_before_underflow, prev_limit may already have underflowed.
        Node* underflow_cmp = new CmpINode(limit_before_underflow, new_limit_with_underflow);
        register_new_node(underflow_cmp, ctrl);
        Node* underflow_bool = new BoolNode(underflow_cmp, bt);
        register_new_node(underflow_bool, ctrl);
        // Prevent type from becoming too pessimistic due to type underflow. The new limit
        // may be arbitrarily decreased by unrolling, but still in [min_jint..hi] / [lo..max_jint]
        const TypeInt* limit_before_underflow_t = _igvn.type(limit_before_underflow)->is_int();
        const TypeInt* no_underflow_t = TypeInt::make(stride_con > 0 ? min_jint : limit_before_underflow_t->_lo,
                                                      stride_con > 0 ? limit_before_underflow_t->_hi : max_jint,
                                                      Type::WidenMax);
        new_limit = new CMoveINode(underflow_bool, new_limit_with_underflow, underflow_clamp, no_underflow_t);
      }
      register_new_node(new_limit, ctrl);
    }
--- a/test/hotspot/jtreg/compiler/loopopts/TestCMoveLimitType.java
+++ b/test/hotspot/jtreg/compiler/loopopts/TestCMoveLimitType.java
@ -0,0 +1,140 @@
 /*
 * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 /*
 * @test
 * @key stress randomness
 * @bug 8299975
 * @summary Limit underflow protection CMoveINode in PhaseIdealLoop::do_unroll must also protect type from underflow
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-TieredCompilation
 *                   -XX:CompileCommand=compileonly,compiler.loopopts.TestCMoveLimitType::test*
 *                   -XX:CompileCommand=dontinline,compiler.loopopts.TestCMoveLimitType::dontInline
 *                   -XX:RepeatCompilation=50 -XX:+StressIGVN
 *                   -Xbatch
 *                   compiler.loopopts.TestCMoveLimitType
 */
 /*
 * @test
 * @key stress randomness
 * @bug 8299975
 * @summary Limit underflow protection CMoveINode in PhaseIdealLoop::do_unroll must also protect type from underflow
 * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-TieredCompilation
 *                   -XX:CompileCommand=compileonly,compiler.loopopts.TestCMoveLimitType::test*
 *                   -XX:CompileCommand=dontinline,compiler.loopopts.TestCMoveLimitType::dontInline
 *                   -XX:RepeatCompilation=50 -XX:+StressIGVN
 *                   -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+TraceLoopOpts
 *                   compiler.loopopts.TestCMoveLimitType
 */
 // Note: if this test fails too intermittently then increase the RepeatCompilation, at the cost of more runtime.
 package compiler.loopopts;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 public class TestCMoveLimitType {
    static int[] iArr = new int[10000];
    static int test_buf(CharBuffer src, ByteBuffer dst) {
        // code reduced from sun.nio.cs.ext.EUC_JP$Encoder::encodeBufferLoop
        int outputSize = 0;
        byte[] outputByte;
        byte[] tmpBuf = new byte[3];
        while (src.hasRemaining()) {
            outputByte = tmpBuf;
            char c = src.get();
            if (c % 3 == 0) {
                outputSize = -2147483648; // int:min -> leads to underflow
            } else {
                outputByte[0] = (byte) 0;
                outputByte[1] = (byte) 1;
                outputByte[2] = (byte) 2;
                outputSize = 3; // mostly 3, to get LOOP to unroll (profiling info)
            }
            if (dst.remaining() < outputSize) {
                return 102;
            }
            // outputSize: int:min..3 -> Phi limited to 0..2
            // PreMainPost: main loop Phi limited to 1..2
            // Unroll(2):
            // Phi of first iteration is 1..2
            // Phi of second is therefore const 2 -> collapse to constant.
            // Increment it by 1, get const int:3 -> incr value goes into exit condition.
            // This loop cannot take the backedge, the tripcount has collapsed.
            // Exit condition must therefore be constant folded, to make loop without phi disappear.
            // Exit limit is the CMoveI created in do_unroll (original limit -1).
            // It protects agains underflow (outputSize can be int:min).
            // CMoveI's type is int, because it does outputSize-1 -> underflow.
            // CmpI(int:3, int) cannot be constant folded, however.
            // Solution: insert CastII after CMoveI, to prevent type underflow.
            // Then we have CmpI(int:3, CastII(CMoveI)) = CmpI(int:3, int:<=3) = GE
            // -> Bool [lt] constant folds to false -> fixed
            for (int i = 0; i < outputSize; i++) { // LOOP
                dst.put(outputByte[i]);
            }
        }
        return 103;
    }
    static CharBuffer makeSrc() {
        CharBuffer src = CharBuffer.allocate(100);
        for (int j = 0; j < 100; j++) {
            if (j % 31 == 0) {
                src.put((char)(0 + (j%3)*3)); // some 0
            } else {
                src.put((char)(1 + (j%3)*3)); // mostly 2
            }
        }
        src.position(0);
        return src;
    }
    static void test_simple(boolean flag) {
        int x = flag ? Integer.MIN_VALUE : 3;
        dontInline();
        // x has type "int:min..3" == "<=3"
        // Leads to Peel and 2x Unroll
        // Exit check needs to collapse
        for (int i = 0; i < x; i++) {
            iArr[i * 2] = 666 + i;
        }
    }
    static void dontInline() {}
    static public void main(String[] args) {
        for (int i = 0; i < 10_000; i++) {
            try {
                test_simple(i % 2 == 0);
            } catch (Exception e) {}
        }
        for (int i = 0; i < 6_000; i++) {
            CharBuffer src = makeSrc();
            ByteBuffer dst = ByteBuffer.allocate(10_000);
            test_buf(src, dst); // call many times -> multiple compilations with different profiling
        }
    }
 }