diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index d92e4decf8c..87b546b94bb 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -1837,14 +1837,6 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { } } -static bool is_simple_name(Node* n) { - return (n->req() == 1 // constant - || (n->is_Type() && n->as_Type()->type()->singleton()) - || n->is_Proj() // parameter or return value - || n->is_Phi() // local of some sort - ); -} - //----------------------------inline_notify-----------------------------------* bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) { const TypeFunc* ftype = OptoRuntime::monitor_notify_Type(); @@ -1943,179 +1935,21 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() { Node* LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) { - // These are the candidate return value: - Node* xvalue = x0; - Node* yvalue = y0; - - if (xvalue == yvalue) { - return xvalue; - } - - bool want_max = (id == vmIntrinsics::_max || id == vmIntrinsics::_max_strict); - - const TypeInt* txvalue = _gvn.type(xvalue)->isa_int(); - const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int(); - if (txvalue == NULL || tyvalue == NULL) return top(); - // This is not really necessary, but it is consistent with a - // hypothetical MaxINode::Value method: - int widen = MAX2(txvalue->_widen, tyvalue->_widen); - - // %%% This folding logic should (ideally) be in a different place. - // Some should be inside IfNode, and there to be a more reliable - // transformation of ?: style patterns into cmoves. We also want - // more powerful optimizations around cmove and min/max. - - // Try to find a dominating comparison of these guys. - // It can simplify the index computation for Arrays.copyOf - // and similar uses of System.arraycopy. - // First, compute the normalized version of CmpI(x, y). - int cmp_op = Op_CmpI; - Node* xkey = xvalue; - Node* ykey = yvalue; - Node* ideal_cmpxy = _gvn.transform(new CmpINode(xkey, ykey)); - if (ideal_cmpxy->is_Cmp()) { - // E.g., if we have CmpI(length - offset, count), - // it might idealize to CmpI(length, count + offset) - cmp_op = ideal_cmpxy->Opcode(); - xkey = ideal_cmpxy->in(1); - ykey = ideal_cmpxy->in(2); - } - - // Start by locating any relevant comparisons. - Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey; - Node* cmpxy = NULL; - Node* cmpyx = NULL; - for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) { - Node* cmp = start_from->fast_out(k); - if (cmp->outcnt() > 0 && // must have prior uses - cmp->in(0) == NULL && // must be context-independent - cmp->Opcode() == cmp_op) { // right kind of compare - if (cmp->in(1) == xkey && cmp->in(2) == ykey) cmpxy = cmp; - if (cmp->in(1) == ykey && cmp->in(2) == xkey) cmpyx = cmp; - } - } - - const int NCMPS = 2; - Node* cmps[NCMPS] = { cmpxy, cmpyx }; - int cmpn; - for (cmpn = 0; cmpn < NCMPS; cmpn++) { - if (cmps[cmpn] != NULL) break; // find a result - } - if (cmpn < NCMPS) { - // Look for a dominating test that tells us the min and max. - int depth = 0; // Limit search depth for speed - Node* dom = control(); - for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) { - if (++depth >= 100) break; - Node* ifproj = dom; - if (!ifproj->is_Proj()) continue; - Node* iff = ifproj->in(0); - if (!iff->is_If()) continue; - Node* bol = iff->in(1); - if (!bol->is_Bool()) continue; - Node* cmp = bol->in(1); - if (cmp == NULL) continue; - for (cmpn = 0; cmpn < NCMPS; cmpn++) - if (cmps[cmpn] == cmp) break; - if (cmpn == NCMPS) continue; - BoolTest::mask btest = bol->as_Bool()->_test._test; - if (ifproj->is_IfFalse()) btest = BoolTest(btest).negate(); - if (cmp->in(1) == ykey) btest = BoolTest(btest).commute(); - // At this point, we know that 'x btest y' is true. - switch (btest) { - case BoolTest::eq: - // They are proven equal, so we can collapse the min/max. - // Either value is the answer. Choose the simpler. - if (is_simple_name(yvalue) && !is_simple_name(xvalue)) - return yvalue; - return xvalue; - case BoolTest::lt: // x < y - case BoolTest::le: // x <= y - return (want_max ? yvalue : xvalue); - case BoolTest::gt: // x > y - case BoolTest::ge: // x >= y - return (want_max ? xvalue : yvalue); - default: - break; - } - } - } - - // We failed to find a dominating test. - // Let's pick a test that might GVN with prior tests. - Node* best_bol = NULL; - BoolTest::mask best_btest = BoolTest::illegal; - for (cmpn = 0; cmpn < NCMPS; cmpn++) { - Node* cmp = cmps[cmpn]; - if (cmp == NULL) continue; - for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) { - Node* bol = cmp->fast_out(j); - if (!bol->is_Bool()) continue; - BoolTest::mask btest = bol->as_Bool()->_test._test; - if (btest == BoolTest::eq || btest == BoolTest::ne) continue; - if (cmp->in(1) == ykey) btest = BoolTest(btest).commute(); - if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) { - best_bol = bol->as_Bool(); - best_btest = btest; - } - } - } - - Node* answer_if_true = NULL; - Node* answer_if_false = NULL; - switch (best_btest) { - default: - if (cmpxy == NULL) - cmpxy = ideal_cmpxy; - best_bol = _gvn.transform(new BoolNode(cmpxy, BoolTest::lt)); - // and fall through: - case BoolTest::lt: // x < y - case BoolTest::le: // x <= y - answer_if_true = (want_max ? yvalue : xvalue); - answer_if_false = (want_max ? xvalue : yvalue); - break; - case BoolTest::gt: // x > y - case BoolTest::ge: // x >= y - answer_if_true = (want_max ? xvalue : yvalue); - answer_if_false = (want_max ? yvalue : xvalue); - break; - } - - jint hi, lo; - if (want_max) { - // We can sharpen the minimum. - hi = MAX2(txvalue->_hi, tyvalue->_hi); - lo = MAX2(txvalue->_lo, tyvalue->_lo); - } else { - // We can sharpen the maximum. - hi = MIN2(txvalue->_hi, tyvalue->_hi); - lo = MIN2(txvalue->_lo, tyvalue->_lo); - } - - // Use a flow-free graph structure, to avoid creating excess control edges - // which could hinder other optimizations. - // Since Math.min/max is often used with arraycopy, we want - // tightly_coupled_allocation to be able to see beyond min/max expressions. - Node* cmov = CMoveNode::make(NULL, best_bol, - answer_if_false, answer_if_true, - TypeInt::make(lo, hi, widen)); - - return _gvn.transform(cmov); - - /* - // This is not as desirable as it may seem, since Min and Max - // nodes do not have a full set of optimizations. - // And they would interfere, anyway, with 'if' optimizations - // and with CMoveI canonical forms. + Node* result_val = NULL; switch (id) { case vmIntrinsics::_min: - result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break; + case vmIntrinsics::_min_strict: + result_val = _gvn.transform(new MinINode(x0, y0)); + break; case vmIntrinsics::_max: - result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break; + case vmIntrinsics::_max_strict: + result_val = _gvn.transform(new MaxINode(x0, y0)); + break; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } - */ + return result_val; } inline int diff --git a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java index 9dda68c401b..650986a3285 100644 --- a/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java +++ b/test/hotspot/jtreg/compiler/lib/ir_framework/IRNode.java @@ -209,6 +209,9 @@ public class IRNode { public static final String VECTOR_UCAST_I2X = START + "VectorUCastI2X" + MID + END; public static final String VECTOR_REINTERPRET = START + "VectorReinterpret" + MID + END; + public static final String Min_V = START + "MinV" + MID + END; + public static final String Max_V = START + "MaxV" + MID + END; + public static final String FAST_LOCK = START + "FastLock" + MID + END; public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END; diff --git a/test/hotspot/jtreg/compiler/vectorization/TestAutoVecIntMinMax.java b/test/hotspot/jtreg/compiler/vectorization/TestAutoVecIntMinMax.java new file mode 100644 index 00000000000..0bcb0f60226 --- /dev/null +++ b/test/hotspot/jtreg/compiler/vectorization/TestAutoVecIntMinMax.java @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2022, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.c2.irTests; + +import compiler.lib.ir_framework.*; +import java.util.Random; +import jdk.test.lib.Utils; + +/* + * @test + * @bug 8288107 + * @summary Auto-vectorization enhancement for integer Math.max/Math.min operations + * @library /test/lib / + * @requires vm.compiler2.enabled + * @requires (os.simpleArch == "x64" & ((vm.cpu.features ~= ".*avx.*") + * | (vm.cpu.features ~= ".*sse4.*"))) | os.arch == "aarch64" | os.arch == "riscv64" + * @run driver compiler.c2.irTests.TestAutoVecIntMinMax + */ + +public class TestAutoVecIntMinMax { + private final static int LENGTH = 2000; + private final static Random RANDOM = Utils.getRandomInstance(); + + private static int[] a; + private static int[] b; + private static int[] c; + + static { + a = new int[LENGTH]; + b = new int[LENGTH]; + c = new int[LENGTH]; + for(int i = 0; i < LENGTH; i++) { + a[i] = RANDOM.nextInt(); + b[i] = RANDOM.nextInt(); + } + } + + public static void main(String[] args) { + TestFramework.run(); + } + + // Test for auto-vectorization of Math.min operation on an array of integers + @Test + @IR(counts = {IRNode.LOAD_VECTOR, " >0 "}) + @IR(counts = {IRNode.Min_V, " >0 "}) + @IR(counts = {IRNode.STORE_VECTOR, " >0 "}) + private static void testIntMin(int[] a, int[] b) { + for(int i = 0; i < LENGTH; i++) { + c[i] = Math.min(a[i], b[i]); + } + } + + // Test for auto-vectorization of StrictMath.min operation on an array of integers + @Test + @IR(counts = {IRNode.LOAD_VECTOR, " >0 "}) + @IR(counts = {IRNode.Min_V, " >0 "}) + @IR(counts = {IRNode.STORE_VECTOR, " >0 "}) + private static void testIntStrictMin(int[] a, int[] b) { + for(int i = 0; i < LENGTH; i++) { + c[i] = StrictMath.min(a[i], b[i]); + } + } + + // Test for auto-vectorization of Math.max operation on an array of integers + @Test + @IR(counts = {IRNode.LOAD_VECTOR, " >0 "}) + @IR(counts = {IRNode.Max_V, " >0 "}) + @IR(counts = {IRNode.STORE_VECTOR, " >0 "}) + private static void testIntMax(int[] a, int[] b) { + for(int i = 0; i < LENGTH; i++) { + c[i] = Math.max(a[i], b[i]); + } + } + + // Test for auto-vectorization of StrictMath.max operation on an array of integers + @Test + @IR(counts = {IRNode.LOAD_VECTOR, " >0 "}) + @IR(counts = {IRNode.Max_V, " >0 "}) + @IR(counts = {IRNode.STORE_VECTOR, " >0 "}) + private static void testIntStrictMax(int[] a, int[] b) { + for(int i = 0; i < LENGTH; i++) { + c[i] = StrictMath.max(a[i], b[i]); + } + } + + @Run(test = {"testIntMin", "testIntStrictMin", "testIntMax", "testIntStrictMax"}) + private void testIntMinMax_runner() { + testIntMin(a, b); + testIntStrictMin(a, b); + testIntMax(a, b); + testIntStrictMax(a, b); + } +} diff --git a/test/micro/org/openjdk/bench/vm/compiler/VectorIntMinMax.java b/test/micro/org/openjdk/bench/vm/compiler/VectorIntMinMax.java new file mode 100644 index 00000000000..13754ec3cb1 --- /dev/null +++ b/test/micro/org/openjdk/bench/vm/compiler/VectorIntMinMax.java @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022, Arm Limited. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.vm.compiler; + +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.*; + +import java.util.concurrent.TimeUnit; +import java.util.Random; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +public class VectorIntMinMax { + @Param({"2048"}) + private int LENGTH; + + private int[] ia; + private int[] ib; + private int[] ic; + + @Param("0") + private int seed; + private Random random = new Random(seed); + + @Setup + public void init() { + ia = new int[LENGTH]; + ib = new int[LENGTH]; + ic = new int[LENGTH]; + + for (int i = 0; i < LENGTH; i++) { + ia[i] = random.nextInt(); + ib[i] = random.nextInt(); + } + } + + // Test Math.max for int arrays + @Benchmark + public void testMaxInt() { + for (int i = 0; i < LENGTH; i++) { + ic[i] = Math.max(ia[i], ib[i]); + } + } + + // Test Math.min for int arrays + @Benchmark + public void testMinInt() { + for (int i = 0; i < LENGTH; i++) { + ic[i] = Math.min(ia[i], ib[i]); + } + } + + // Test StrictMath.min for int arrays + @Benchmark + public void testStrictMinInt() { + for (int i = 0; i < LENGTH; i++) { + ic[i] = StrictMath.min(ia[i], ib[i]); + } + } + + // Test StrictMath.max for int arrays + @Benchmark + public void testStrictMaxInt() { + for (int i = 0; i < LENGTH; i++) { + ic[i] = StrictMath.max(ia[i], ib[i]); + } + } +}