8288107: Auto-vectorization for integer min/max

Reviewed-by: kvn, ngasson
2022-07-20 15:36:39 +00:00 · 2022-07-20 15:36:39 +00:00 · 89458e36af
commit 89458e36af
parent 3d3e3df8f0
4 changed files with 216 additions and 176 deletions
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -1837,14 +1837,6 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
  }
 }

-static bool is_simple_name(Node* n) {
-  return (n->req() == 1         // constant
-          || (n->is_Type() && n->as_Type()->type()->singleton())
-          || n->is_Proj()       // parameter or return value
-          || n->is_Phi()        // local of some sort
-          );
-}
-
 //----------------------------inline_notify-----------------------------------*
 bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {
  const TypeFunc* ftype = OptoRuntime::monitor_notify_Type();
@ -1943,179 +1935,21 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() {

 Node*
 LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
-  // These are the candidate return value:
-  Node* xvalue = x0;
-  Node* yvalue = y0;
-
-  if (xvalue == yvalue) {
-    return xvalue;
-  }
-
-  bool want_max = (id == vmIntrinsics::_max || id == vmIntrinsics::_max_strict);
-
-  const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
-  const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
-  if (txvalue == NULL || tyvalue == NULL)  return top();
-  // This is not really necessary, but it is consistent with a
-  // hypothetical MaxINode::Value method:
-  int widen = MAX2(txvalue->_widen, tyvalue->_widen);
-
-  // %%% This folding logic should (ideally) be in a different place.
-  // Some should be inside IfNode, and there to be a more reliable
-  // transformation of ?: style patterns into cmoves.  We also want
-  // more powerful optimizations around cmove and min/max.
-
-  // Try to find a dominating comparison of these guys.
-  // It can simplify the index computation for Arrays.copyOf
-  // and similar uses of System.arraycopy.
-  // First, compute the normalized version of CmpI(x, y).
-  int   cmp_op = Op_CmpI;
-  Node* xkey = xvalue;
-  Node* ykey = yvalue;
-  Node* ideal_cmpxy = _gvn.transform(new CmpINode(xkey, ykey));
-  if (ideal_cmpxy->is_Cmp()) {
-    // E.g., if we have CmpI(length - offset, count),
-    // it might idealize to CmpI(length, count + offset)
-    cmp_op = ideal_cmpxy->Opcode();
-    xkey = ideal_cmpxy->in(1);
-    ykey = ideal_cmpxy->in(2);
-  }
-
-  // Start by locating any relevant comparisons.
-  Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
-  Node* cmpxy = NULL;
-  Node* cmpyx = NULL;
-  for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
-    Node* cmp = start_from->fast_out(k);
-    if (cmp->outcnt() > 0 &&            // must have prior uses
-        cmp->in(0) == NULL &&           // must be context-independent
-        cmp->Opcode() == cmp_op) {      // right kind of compare
-      if (cmp->in(1) == xkey && cmp->in(2) == ykey)  cmpxy = cmp;
-      if (cmp->in(1) == ykey && cmp->in(2) == xkey)  cmpyx = cmp;
-    }
-  }
-
-  const int NCMPS = 2;
-  Node* cmps[NCMPS] = { cmpxy, cmpyx };
-  int cmpn;
-  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
-    if (cmps[cmpn] != NULL)  break;     // find a result
-  }
-  if (cmpn < NCMPS) {
-    // Look for a dominating test that tells us the min and max.
-    int depth = 0;                // Limit search depth for speed
-    Node* dom = control();
-    for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
-      if (++depth >= 100)  break;
-      Node* ifproj = dom;
-      if (!ifproj->is_Proj())  continue;
-      Node* iff = ifproj->in(0);
-      if (!iff->is_If())  continue;
-      Node* bol = iff->in(1);
-      if (!bol->is_Bool())  continue;
-      Node* cmp = bol->in(1);
-      if (cmp == NULL)  continue;
-      for (cmpn = 0; cmpn < NCMPS; cmpn++)
-        if (cmps[cmpn] == cmp)  break;
-      if (cmpn == NCMPS)  continue;
-      BoolTest::mask btest = bol->as_Bool()->_test._test;
-      if (ifproj->is_IfFalse())  btest = BoolTest(btest).negate();
-      if (cmp->in(1) == ykey)    btest = BoolTest(btest).commute();
-      // At this point, we know that 'x btest y' is true.
-      switch (btest) {
-      case BoolTest::eq:
-        // They are proven equal, so we can collapse the min/max.
-        // Either value is the answer.  Choose the simpler.
-        if (is_simple_name(yvalue) && !is_simple_name(xvalue))
-          return yvalue;
-        return xvalue;
-      case BoolTest::lt:          // x < y
-      case BoolTest::le:          // x <= y
-        return (want_max ? yvalue : xvalue);
-      case BoolTest::gt:          // x > y
-      case BoolTest::ge:          // x >= y
-        return (want_max ? xvalue : yvalue);
-      default:
-        break;
-      }
-    }
-  }
-
-  // We failed to find a dominating test.
-  // Let's pick a test that might GVN with prior tests.
-  Node*          best_bol   = NULL;
-  BoolTest::mask best_btest = BoolTest::illegal;
-  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
-    Node* cmp = cmps[cmpn];
-    if (cmp == NULL)  continue;
-    for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
-      Node* bol = cmp->fast_out(j);
-      if (!bol->is_Bool())  continue;
-      BoolTest::mask btest = bol->as_Bool()->_test._test;
-      if (btest == BoolTest::eq || btest == BoolTest::ne)  continue;
-      if (cmp->in(1) == ykey)   btest = BoolTest(btest).commute();
-      if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
-        best_bol   = bol->as_Bool();
-        best_btest = btest;
-      }
-    }
-  }
-
-  Node* answer_if_true  = NULL;
-  Node* answer_if_false = NULL;
-  switch (best_btest) {
-  default:
-    if (cmpxy == NULL)
-      cmpxy = ideal_cmpxy;
-    best_bol = _gvn.transform(new BoolNode(cmpxy, BoolTest::lt));
-    // and fall through:
-  case BoolTest::lt:          // x < y
-  case BoolTest::le:          // x <= y
-    answer_if_true  = (want_max ? yvalue : xvalue);
-    answer_if_false = (want_max ? xvalue : yvalue);
-    break;
-  case BoolTest::gt:          // x > y
-  case BoolTest::ge:          // x >= y
-    answer_if_true  = (want_max ? xvalue : yvalue);
-    answer_if_false = (want_max ? yvalue : xvalue);
-    break;
-  }
-
-  jint hi, lo;
-  if (want_max) {
-    // We can sharpen the minimum.
-    hi = MAX2(txvalue->_hi, tyvalue->_hi);
-    lo = MAX2(txvalue->_lo, tyvalue->_lo);
-  } else {
-    // We can sharpen the maximum.
-    hi = MIN2(txvalue->_hi, tyvalue->_hi);
-    lo = MIN2(txvalue->_lo, tyvalue->_lo);
-  }
-
-  // Use a flow-free graph structure, to avoid creating excess control edges
-  // which could hinder other optimizations.
-  // Since Math.min/max is often used with arraycopy, we want
-  // tightly_coupled_allocation to be able to see beyond min/max expressions.
-  Node* cmov = CMoveNode::make(NULL, best_bol,
-                               answer_if_false, answer_if_true,
-                               TypeInt::make(lo, hi, widen));
-
-  return _gvn.transform(cmov);
-
-  /*
-  // This is not as desirable as it may seem, since Min and Max
-  // nodes do not have a full set of optimizations.
-  // And they would interfere, anyway, with 'if' optimizations
-  // and with CMoveI canonical forms.
+  Node* result_val = NULL;
  switch (id) {
  case vmIntrinsics::_min:
-    result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
+  case vmIntrinsics::_min_strict:
+    result_val = _gvn.transform(new MinINode(x0, y0));
+    break;
  case vmIntrinsics::_max:
-    result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
+  case vmIntrinsics::_max_strict:
+    result_val = _gvn.transform(new MaxINode(x0, y0));
+    break;
  default:
-    ShouldNotReachHere();
+    fatal_unexpected_iid(id);
+    break;
  }
-  */
+  return result_val;
 }

 inline int
--- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
+++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java
@ -209,6 +209,9 @@ public class IRNode {
    public static final String VECTOR_UCAST_I2X = START + "VectorUCastI2X" + MID + END;
    public static final String VECTOR_REINTERPRET = START + "VectorReinterpret" + MID + END;

+    public static final String Min_V = START + "MinV" + MID + END;
+    public static final String Max_V = START + "MaxV" + MID + END;
+
    public static final String FAST_LOCK   = START + "FastLock" + MID + END;
    public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END;

--- a/test/hotspot/jtreg/compiler/vectorization/TestAutoVecIntMinMax.java
+++ b/test/hotspot/jtreg/compiler/vectorization/TestAutoVecIntMinMax.java
@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.c2.irTests;
+
+import compiler.lib.ir_framework.*;
+import java.util.Random;
+import jdk.test.lib.Utils;
+
+/*
+ * @test
+ * @bug 8288107
+ * @summary Auto-vectorization enhancement for integer Math.max/Math.min operations
+ * @library /test/lib /
+ * @requires vm.compiler2.enabled
+ * @requires (os.simpleArch == "x64" & ((vm.cpu.features ~= ".*avx.*")
+ *           | (vm.cpu.features ~= ".*sse4.*"))) | os.arch == "aarch64" | os.arch == "riscv64"
+ * @run driver compiler.c2.irTests.TestAutoVecIntMinMax
+ */
+
+public class TestAutoVecIntMinMax {
+    private final static int LENGTH = 2000;
+    private final static Random RANDOM = Utils.getRandomInstance();
+
+    private static int[] a;
+    private static int[] b;
+    private static int[] c;
+
+    static {
+        a = new int[LENGTH];
+        b = new int[LENGTH];
+        c = new int[LENGTH];
+        for(int i = 0; i < LENGTH; i++) {
+            a[i] = RANDOM.nextInt();
+            b[i] = RANDOM.nextInt();
+        }
+    }
+
+    public static void main(String[] args) {
+        TestFramework.run();
+    }
+
+    // Test for auto-vectorization of Math.min operation on an array of integers
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR,  " >0 "})
+    @IR(counts = {IRNode.Min_V,        " >0 "})
+    @IR(counts = {IRNode.STORE_VECTOR, " >0 "})
+    private static void testIntMin(int[] a, int[] b) {
+        for(int i = 0; i < LENGTH; i++) {
+            c[i] = Math.min(a[i], b[i]);
+        }
+    }
+
+    // Test for auto-vectorization of StrictMath.min operation on an array of integers
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR,  " >0 "})
+    @IR(counts = {IRNode.Min_V,        " >0 "})
+    @IR(counts = {IRNode.STORE_VECTOR, " >0 "})
+    private static void testIntStrictMin(int[] a, int[] b) {
+        for(int i = 0; i < LENGTH; i++) {
+            c[i] = StrictMath.min(a[i], b[i]);
+        }
+    }
+
+    // Test for auto-vectorization of Math.max operation on an array of integers
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR,  " >0 "})
+    @IR(counts = {IRNode.Max_V,        " >0 "})
+    @IR(counts = {IRNode.STORE_VECTOR, " >0 "})
+    private static void testIntMax(int[] a, int[] b) {
+        for(int i = 0; i < LENGTH; i++) {
+            c[i] = Math.max(a[i], b[i]);
+        }
+    }
+
+    // Test for auto-vectorization of StrictMath.max operation on an array of integers
+    @Test
+    @IR(counts = {IRNode.LOAD_VECTOR,  " >0 "})
+    @IR(counts = {IRNode.Max_V,        " >0 "})
+    @IR(counts = {IRNode.STORE_VECTOR, " >0 "})
+    private static void testIntStrictMax(int[] a, int[] b) {
+        for(int i = 0; i < LENGTH; i++) {
+            c[i] = StrictMath.max(a[i], b[i]);
+        }
+    }
+
+    @Run(test = {"testIntMin", "testIntStrictMin", "testIntMax", "testIntStrictMax"})
+    private void testIntMinMax_runner() {
+        testIntMin(a, b);
+        testIntStrictMin(a, b);
+        testIntMax(a, b);
+        testIntStrictMax(a, b);
+    }
+}
--- a/test/micro/org/openjdk/bench/vm/compiler/VectorIntMinMax.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/VectorIntMinMax.java
@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.vm.compiler;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.*;
+
+import java.util.concurrent.TimeUnit;
+import java.util.Random;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+public class VectorIntMinMax {
+    @Param({"2048"})
+    private int LENGTH;
+
+    private int[] ia;
+    private int[] ib;
+    private int[] ic;
+
+    @Param("0")
+    private int seed;
+    private Random random = new Random(seed);
+
+    @Setup
+    public void init() {
+        ia = new int[LENGTH];
+        ib = new int[LENGTH];
+        ic = new int[LENGTH];
+
+        for (int i = 0; i < LENGTH; i++) {
+            ia[i] = random.nextInt();
+            ib[i] = random.nextInt();
+        }
+    }
+
+    // Test Math.max for int arrays
+    @Benchmark
+    public void testMaxInt() {
+        for (int i = 0; i < LENGTH; i++) {
+            ic[i] = Math.max(ia[i], ib[i]);
+        }
+    }
+
+    // Test Math.min for int arrays
+    @Benchmark
+    public void testMinInt() {
+        for (int i = 0; i < LENGTH; i++) {
+            ic[i] = Math.min(ia[i], ib[i]);
+        }
+    }
+
+    // Test StrictMath.min for int arrays
+    @Benchmark
+    public void testStrictMinInt() {
+        for (int i = 0; i < LENGTH; i++) {
+            ic[i] = StrictMath.min(ia[i], ib[i]);
+        }
+    }
+
+    // Test StrictMath.max for int arrays
+    @Benchmark
+    public void testStrictMaxInt() {
+        for (int i = 0; i < LENGTH; i++) {
+            ic[i] = StrictMath.max(ia[i], ib[i]);
+        }
+    }
+}