8288107: Auto-vectorization for integer min/max
Reviewed-by: kvn, ngasson
This commit is contained in:
parent
3d3e3df8f0
commit
89458e36af
@ -1837,14 +1837,6 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_simple_name(Node* n) {
|
|
||||||
return (n->req() == 1 // constant
|
|
||||||
|| (n->is_Type() && n->as_Type()->type()->singleton())
|
|
||||||
|| n->is_Proj() // parameter or return value
|
|
||||||
|| n->is_Phi() // local of some sort
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
//----------------------------inline_notify-----------------------------------*
|
//----------------------------inline_notify-----------------------------------*
|
||||||
bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {
|
bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) {
|
||||||
const TypeFunc* ftype = OptoRuntime::monitor_notify_Type();
|
const TypeFunc* ftype = OptoRuntime::monitor_notify_Type();
|
||||||
@ -1943,179 +1935,21 @@ bool LibraryCallKit::inline_math_unsignedMultiplyHigh() {
|
|||||||
|
|
||||||
Node*
|
Node*
|
||||||
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
|
LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
|
||||||
// These are the candidate return value:
|
Node* result_val = NULL;
|
||||||
Node* xvalue = x0;
|
|
||||||
Node* yvalue = y0;
|
|
||||||
|
|
||||||
if (xvalue == yvalue) {
|
|
||||||
return xvalue;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool want_max = (id == vmIntrinsics::_max || id == vmIntrinsics::_max_strict);
|
|
||||||
|
|
||||||
const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
|
|
||||||
const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
|
|
||||||
if (txvalue == NULL || tyvalue == NULL) return top();
|
|
||||||
// This is not really necessary, but it is consistent with a
|
|
||||||
// hypothetical MaxINode::Value method:
|
|
||||||
int widen = MAX2(txvalue->_widen, tyvalue->_widen);
|
|
||||||
|
|
||||||
// %%% This folding logic should (ideally) be in a different place.
|
|
||||||
// Some should be inside IfNode, and there to be a more reliable
|
|
||||||
// transformation of ?: style patterns into cmoves. We also want
|
|
||||||
// more powerful optimizations around cmove and min/max.
|
|
||||||
|
|
||||||
// Try to find a dominating comparison of these guys.
|
|
||||||
// It can simplify the index computation for Arrays.copyOf
|
|
||||||
// and similar uses of System.arraycopy.
|
|
||||||
// First, compute the normalized version of CmpI(x, y).
|
|
||||||
int cmp_op = Op_CmpI;
|
|
||||||
Node* xkey = xvalue;
|
|
||||||
Node* ykey = yvalue;
|
|
||||||
Node* ideal_cmpxy = _gvn.transform(new CmpINode(xkey, ykey));
|
|
||||||
if (ideal_cmpxy->is_Cmp()) {
|
|
||||||
// E.g., if we have CmpI(length - offset, count),
|
|
||||||
// it might idealize to CmpI(length, count + offset)
|
|
||||||
cmp_op = ideal_cmpxy->Opcode();
|
|
||||||
xkey = ideal_cmpxy->in(1);
|
|
||||||
ykey = ideal_cmpxy->in(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start by locating any relevant comparisons.
|
|
||||||
Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
|
|
||||||
Node* cmpxy = NULL;
|
|
||||||
Node* cmpyx = NULL;
|
|
||||||
for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
|
|
||||||
Node* cmp = start_from->fast_out(k);
|
|
||||||
if (cmp->outcnt() > 0 && // must have prior uses
|
|
||||||
cmp->in(0) == NULL && // must be context-independent
|
|
||||||
cmp->Opcode() == cmp_op) { // right kind of compare
|
|
||||||
if (cmp->in(1) == xkey && cmp->in(2) == ykey) cmpxy = cmp;
|
|
||||||
if (cmp->in(1) == ykey && cmp->in(2) == xkey) cmpyx = cmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int NCMPS = 2;
|
|
||||||
Node* cmps[NCMPS] = { cmpxy, cmpyx };
|
|
||||||
int cmpn;
|
|
||||||
for (cmpn = 0; cmpn < NCMPS; cmpn++) {
|
|
||||||
if (cmps[cmpn] != NULL) break; // find a result
|
|
||||||
}
|
|
||||||
if (cmpn < NCMPS) {
|
|
||||||
// Look for a dominating test that tells us the min and max.
|
|
||||||
int depth = 0; // Limit search depth for speed
|
|
||||||
Node* dom = control();
|
|
||||||
for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
|
|
||||||
if (++depth >= 100) break;
|
|
||||||
Node* ifproj = dom;
|
|
||||||
if (!ifproj->is_Proj()) continue;
|
|
||||||
Node* iff = ifproj->in(0);
|
|
||||||
if (!iff->is_If()) continue;
|
|
||||||
Node* bol = iff->in(1);
|
|
||||||
if (!bol->is_Bool()) continue;
|
|
||||||
Node* cmp = bol->in(1);
|
|
||||||
if (cmp == NULL) continue;
|
|
||||||
for (cmpn = 0; cmpn < NCMPS; cmpn++)
|
|
||||||
if (cmps[cmpn] == cmp) break;
|
|
||||||
if (cmpn == NCMPS) continue;
|
|
||||||
BoolTest::mask btest = bol->as_Bool()->_test._test;
|
|
||||||
if (ifproj->is_IfFalse()) btest = BoolTest(btest).negate();
|
|
||||||
if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
|
|
||||||
// At this point, we know that 'x btest y' is true.
|
|
||||||
switch (btest) {
|
|
||||||
case BoolTest::eq:
|
|
||||||
// They are proven equal, so we can collapse the min/max.
|
|
||||||
// Either value is the answer. Choose the simpler.
|
|
||||||
if (is_simple_name(yvalue) && !is_simple_name(xvalue))
|
|
||||||
return yvalue;
|
|
||||||
return xvalue;
|
|
||||||
case BoolTest::lt: // x < y
|
|
||||||
case BoolTest::le: // x <= y
|
|
||||||
return (want_max ? yvalue : xvalue);
|
|
||||||
case BoolTest::gt: // x > y
|
|
||||||
case BoolTest::ge: // x >= y
|
|
||||||
return (want_max ? xvalue : yvalue);
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We failed to find a dominating test.
|
|
||||||
// Let's pick a test that might GVN with prior tests.
|
|
||||||
Node* best_bol = NULL;
|
|
||||||
BoolTest::mask best_btest = BoolTest::illegal;
|
|
||||||
for (cmpn = 0; cmpn < NCMPS; cmpn++) {
|
|
||||||
Node* cmp = cmps[cmpn];
|
|
||||||
if (cmp == NULL) continue;
|
|
||||||
for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
|
|
||||||
Node* bol = cmp->fast_out(j);
|
|
||||||
if (!bol->is_Bool()) continue;
|
|
||||||
BoolTest::mask btest = bol->as_Bool()->_test._test;
|
|
||||||
if (btest == BoolTest::eq || btest == BoolTest::ne) continue;
|
|
||||||
if (cmp->in(1) == ykey) btest = BoolTest(btest).commute();
|
|
||||||
if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
|
|
||||||
best_bol = bol->as_Bool();
|
|
||||||
best_btest = btest;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Node* answer_if_true = NULL;
|
|
||||||
Node* answer_if_false = NULL;
|
|
||||||
switch (best_btest) {
|
|
||||||
default:
|
|
||||||
if (cmpxy == NULL)
|
|
||||||
cmpxy = ideal_cmpxy;
|
|
||||||
best_bol = _gvn.transform(new BoolNode(cmpxy, BoolTest::lt));
|
|
||||||
// and fall through:
|
|
||||||
case BoolTest::lt: // x < y
|
|
||||||
case BoolTest::le: // x <= y
|
|
||||||
answer_if_true = (want_max ? yvalue : xvalue);
|
|
||||||
answer_if_false = (want_max ? xvalue : yvalue);
|
|
||||||
break;
|
|
||||||
case BoolTest::gt: // x > y
|
|
||||||
case BoolTest::ge: // x >= y
|
|
||||||
answer_if_true = (want_max ? xvalue : yvalue);
|
|
||||||
answer_if_false = (want_max ? yvalue : xvalue);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
jint hi, lo;
|
|
||||||
if (want_max) {
|
|
||||||
// We can sharpen the minimum.
|
|
||||||
hi = MAX2(txvalue->_hi, tyvalue->_hi);
|
|
||||||
lo = MAX2(txvalue->_lo, tyvalue->_lo);
|
|
||||||
} else {
|
|
||||||
// We can sharpen the maximum.
|
|
||||||
hi = MIN2(txvalue->_hi, tyvalue->_hi);
|
|
||||||
lo = MIN2(txvalue->_lo, tyvalue->_lo);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use a flow-free graph structure, to avoid creating excess control edges
|
|
||||||
// which could hinder other optimizations.
|
|
||||||
// Since Math.min/max is often used with arraycopy, we want
|
|
||||||
// tightly_coupled_allocation to be able to see beyond min/max expressions.
|
|
||||||
Node* cmov = CMoveNode::make(NULL, best_bol,
|
|
||||||
answer_if_false, answer_if_true,
|
|
||||||
TypeInt::make(lo, hi, widen));
|
|
||||||
|
|
||||||
return _gvn.transform(cmov);
|
|
||||||
|
|
||||||
/*
|
|
||||||
// This is not as desirable as it may seem, since Min and Max
|
|
||||||
// nodes do not have a full set of optimizations.
|
|
||||||
// And they would interfere, anyway, with 'if' optimizations
|
|
||||||
// and with CMoveI canonical forms.
|
|
||||||
switch (id) {
|
switch (id) {
|
||||||
case vmIntrinsics::_min:
|
case vmIntrinsics::_min:
|
||||||
result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
|
case vmIntrinsics::_min_strict:
|
||||||
|
result_val = _gvn.transform(new MinINode(x0, y0));
|
||||||
|
break;
|
||||||
case vmIntrinsics::_max:
|
case vmIntrinsics::_max:
|
||||||
result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
|
case vmIntrinsics::_max_strict:
|
||||||
|
result_val = _gvn.transform(new MaxINode(x0, y0));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ShouldNotReachHere();
|
fatal_unexpected_iid(id);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
*/
|
return result_val;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int
|
inline int
|
||||||
|
@ -209,6 +209,9 @@ public class IRNode {
|
|||||||
public static final String VECTOR_UCAST_I2X = START + "VectorUCastI2X" + MID + END;
|
public static final String VECTOR_UCAST_I2X = START + "VectorUCastI2X" + MID + END;
|
||||||
public static final String VECTOR_REINTERPRET = START + "VectorReinterpret" + MID + END;
|
public static final String VECTOR_REINTERPRET = START + "VectorReinterpret" + MID + END;
|
||||||
|
|
||||||
|
public static final String Min_V = START + "MinV" + MID + END;
|
||||||
|
public static final String Max_V = START + "MaxV" + MID + END;
|
||||||
|
|
||||||
public static final String FAST_LOCK = START + "FastLock" + MID + END;
|
public static final String FAST_LOCK = START + "FastLock" + MID + END;
|
||||||
public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END;
|
public static final String FAST_UNLOCK = START + "FastUnlock" + MID + END;
|
||||||
|
|
||||||
|
@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package compiler.c2.irTests;
|
||||||
|
|
||||||
|
import compiler.lib.ir_framework.*;
|
||||||
|
import java.util.Random;
|
||||||
|
import jdk.test.lib.Utils;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8288107
|
||||||
|
* @summary Auto-vectorization enhancement for integer Math.max/Math.min operations
|
||||||
|
* @library /test/lib /
|
||||||
|
* @requires vm.compiler2.enabled
|
||||||
|
* @requires (os.simpleArch == "x64" & ((vm.cpu.features ~= ".*avx.*")
|
||||||
|
* | (vm.cpu.features ~= ".*sse4.*"))) | os.arch == "aarch64" | os.arch == "riscv64"
|
||||||
|
* @run driver compiler.c2.irTests.TestAutoVecIntMinMax
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TestAutoVecIntMinMax {
|
||||||
|
private final static int LENGTH = 2000;
|
||||||
|
private final static Random RANDOM = Utils.getRandomInstance();
|
||||||
|
|
||||||
|
private static int[] a;
|
||||||
|
private static int[] b;
|
||||||
|
private static int[] c;
|
||||||
|
|
||||||
|
static {
|
||||||
|
a = new int[LENGTH];
|
||||||
|
b = new int[LENGTH];
|
||||||
|
c = new int[LENGTH];
|
||||||
|
for(int i = 0; i < LENGTH; i++) {
|
||||||
|
a[i] = RANDOM.nextInt();
|
||||||
|
b[i] = RANDOM.nextInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
TestFramework.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for auto-vectorization of Math.min operation on an array of integers
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.LOAD_VECTOR, " >0 "})
|
||||||
|
@IR(counts = {IRNode.Min_V, " >0 "})
|
||||||
|
@IR(counts = {IRNode.STORE_VECTOR, " >0 "})
|
||||||
|
private static void testIntMin(int[] a, int[] b) {
|
||||||
|
for(int i = 0; i < LENGTH; i++) {
|
||||||
|
c[i] = Math.min(a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for auto-vectorization of StrictMath.min operation on an array of integers
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.LOAD_VECTOR, " >0 "})
|
||||||
|
@IR(counts = {IRNode.Min_V, " >0 "})
|
||||||
|
@IR(counts = {IRNode.STORE_VECTOR, " >0 "})
|
||||||
|
private static void testIntStrictMin(int[] a, int[] b) {
|
||||||
|
for(int i = 0; i < LENGTH; i++) {
|
||||||
|
c[i] = StrictMath.min(a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for auto-vectorization of Math.max operation on an array of integers
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.LOAD_VECTOR, " >0 "})
|
||||||
|
@IR(counts = {IRNode.Max_V, " >0 "})
|
||||||
|
@IR(counts = {IRNode.STORE_VECTOR, " >0 "})
|
||||||
|
private static void testIntMax(int[] a, int[] b) {
|
||||||
|
for(int i = 0; i < LENGTH; i++) {
|
||||||
|
c[i] = Math.max(a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test for auto-vectorization of StrictMath.max operation on an array of integers
|
||||||
|
@Test
|
||||||
|
@IR(counts = {IRNode.LOAD_VECTOR, " >0 "})
|
||||||
|
@IR(counts = {IRNode.Max_V, " >0 "})
|
||||||
|
@IR(counts = {IRNode.STORE_VECTOR, " >0 "})
|
||||||
|
private static void testIntStrictMax(int[] a, int[] b) {
|
||||||
|
for(int i = 0; i < LENGTH; i++) {
|
||||||
|
c[i] = StrictMath.max(a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testIntMin", "testIntStrictMin", "testIntMax", "testIntStrictMax"})
|
||||||
|
private void testIntMinMax_runner() {
|
||||||
|
testIntMin(a, b);
|
||||||
|
testIntStrictMin(a, b);
|
||||||
|
testIntMax(a, b);
|
||||||
|
testIntStrictMax(a, b);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,89 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Arm Limited. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.vm.compiler;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.*;
|
||||||
|
import org.openjdk.jmh.infra.*;
|
||||||
|
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
public class VectorIntMinMax {
|
||||||
|
@Param({"2048"})
|
||||||
|
private int LENGTH;
|
||||||
|
|
||||||
|
private int[] ia;
|
||||||
|
private int[] ib;
|
||||||
|
private int[] ic;
|
||||||
|
|
||||||
|
@Param("0")
|
||||||
|
private int seed;
|
||||||
|
private Random random = new Random(seed);
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void init() {
|
||||||
|
ia = new int[LENGTH];
|
||||||
|
ib = new int[LENGTH];
|
||||||
|
ic = new int[LENGTH];
|
||||||
|
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ia[i] = random.nextInt();
|
||||||
|
ib[i] = random.nextInt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Math.max for int arrays
|
||||||
|
@Benchmark
|
||||||
|
public void testMaxInt() {
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ic[i] = Math.max(ia[i], ib[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Math.min for int arrays
|
||||||
|
@Benchmark
|
||||||
|
public void testMinInt() {
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ic[i] = Math.min(ia[i], ib[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test StrictMath.min for int arrays
|
||||||
|
@Benchmark
|
||||||
|
public void testStrictMinInt() {
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ic[i] = StrictMath.min(ia[i], ib[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test StrictMath.max for int arrays
|
||||||
|
@Benchmark
|
||||||
|
public void testStrictMaxInt() {
|
||||||
|
for (int i = 0; i < LENGTH; i++) {
|
||||||
|
ic[i] = StrictMath.max(ia[i], ib[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user