8257531: Super word not applied to a loop of simple Buffer operations

Reviewed-by: roland, psandoz
This commit is contained in:
Vladimir Kozlov 2020-12-04 19:23:52 +00:00
parent d76039d3dc
commit dd0b9454a2
3 changed files with 268 additions and 23 deletions
src/hotspot/share/opto
test/hotspot/jtreg/compiler/vectorization

@ -645,7 +645,7 @@ void SuperWord::find_adjacent_refs() {
create_pack = false;
} else {
SWPointer p2(best_align_to_mem_ref, this, NULL, false);
if (align_to_ref_p.invar() != p2.invar()) {
if (!align_to_ref_p.invar_equals(p2)) {
// Do not vectorize memory accesses with different invariants
// if unaligned memory accesses are not allowed.
create_pack = false;
@ -3526,6 +3526,11 @@ void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
invar = new ConvL2INode(invar);
_igvn.register_new_node_with_optimizer(invar);
}
Node* invar_scale = align_to_ref_p.invar_scale();
if (invar_scale != NULL) {
invar = new LShiftINode(invar, invar_scale);
_igvn.register_new_node_with_optimizer(invar);
}
Node* aref = new URShiftINode(invar, log2_elt);
_igvn.register_new_node_with_optimizer(aref);
_phase->set_ctrl(aref, pre_ctrl);
@ -3711,6 +3716,7 @@ int SWPointer::Tracer::_depth = 0;
SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only) :
_mem(mem), _slp(slp), _base(NULL), _adr(NULL),
_scale(0), _offset(0), _invar(NULL), _negate_invar(false),
_invar_scale(NULL),
_nstack(nstack), _analyze_only(analyze_only),
_stack_idx(0)
#ifndef PRODUCT
@ -3779,6 +3785,7 @@ SWPointer::SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool anal
SWPointer::SWPointer(SWPointer* p) :
_mem(p->_mem), _slp(p->_slp), _base(NULL), _adr(NULL),
_scale(0), _offset(0), _invar(NULL), _negate_invar(false),
_invar_scale(NULL),
_nstack(p->_nstack), _analyze_only(p->_analyze_only),
_stack_idx(p->_stack_idx)
#ifndef PRODUCT
@ -3896,7 +3903,7 @@ bool SWPointer::scaled_iv(Node* n) {
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
return true;
}
} else if (opc == Op_LShiftL) {
} else if (opc == Op_LShiftL && n->in(2)->is_Con()) {
if (!has_iv() && _invar == NULL) {
// Need to preserve the current _offset value, so
// create a temporary object for this expression subtree.
@ -3906,14 +3913,16 @@ bool SWPointer::scaled_iv(Node* n) {
NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
if (tmp.scaled_iv_plus_offset(n->in(1))) {
if (tmp._invar == NULL || _slp->do_vector_loop()) {
int mult = 1 << n->in(2)->get_int();
_scale = tmp._scale * mult;
_offset += tmp._offset * mult;
_invar = tmp._invar;
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, mult);)
return true;
int scale = n->in(2)->get_int();
_scale = tmp._scale << scale;
_offset += tmp._offset << scale;
_invar = tmp._invar;
if (_invar != NULL) {
_negate_invar = tmp._negate_invar;
_invar_scale = n->in(2);
}
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar, _negate_invar);)
return true;
}
}
}
@ -4012,12 +4021,14 @@ bool SWPointer::offset_plus_k(Node* n, bool negate) {
//----------------------------print------------------------
void SWPointer::print() {
#ifndef PRODUCT
tty->print("base: %d adr: %d scale: %d offset: %d invar: %c%d\n",
tty->print("base: [%d] adr: [%d] scale: %d offset: %d",
_base != NULL ? _base->_idx : 0,
_adr != NULL ? _adr->_idx : 0,
_scale, _offset,
_negate_invar?'-':'+',
_invar != NULL ? _invar->_idx : 0);
_scale, _offset);
if (_invar != NULL) {
tty->print(" invar: %c[%d] << [%d]", _negate_invar?'-':'+', _invar->_idx, _invar_scale->_idx);
}
tty->cr();
#endif
}
@ -4205,14 +4216,20 @@ void SWPointer::Tracer::scaled_iv_8(Node* n, SWPointer* tmp) {
}
}
void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int _offset, int mult) {
void SWPointer::Tracer::scaled_iv_9(Node* n, int scale, int offset, Node* invar, bool negate_invar) {
if(_slp->is_trace_alignment()) {
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, _offset);
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) %d is scaled_iv_plus_offset, in(2) %d used to get mult = %d: _scale = %d, _offset = %d",
n->in(1)->_idx, n->in(2)->_idx, mult, scale, _offset);
print_depth(); tty->print_cr(" %d SWPointer::scaled_iv: Op_LShiftL PASSED, setting _scale = %d, _offset = %d", n->_idx, scale, offset);
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: in(1) [%d] is scaled_iv_plus_offset, in(2) [%d] used to scale: _scale = %d, _offset = %d",
n->in(1)->_idx, n->in(2)->_idx, scale, offset);
if (invar != NULL) {
print_depth(); tty->print_cr(" \\ SWPointer::scaled_iv: scaled invariant: %c[%d]", (negate_invar?'-':'+'), invar->_idx);
}
inc_depth(); inc_depth();
print_depth(); n->in(1)->dump();
print_depth(); n->in(2)->dump();
if (invar != NULL) {
print_depth(); invar->dump();
}
dec_depth(); dec_depth();
}
}

@ -579,10 +579,13 @@ class SWPointer {
Node* _base; // NULL if unsafe nonheap reference
Node* _adr; // address pointer
jint _scale; // multiplier for iv (in bytes), 0 if no loop iv
jint _offset; // constant offset (in bytes)
int _scale; // multiplier for iv (in bytes), 0 if no loop iv
int _offset; // constant offset (in bytes)
Node* _invar; // invariant offset (in bytes), NULL if none
bool _negate_invar; // if true then use: (0 - _invar)
Node* _invar_scale; // multiplier for invariant
Node_Stack* _nstack; // stack used to record a swpointer trace of variants
bool _analyze_only; // Used in loop unrolling only for swpointer trace
uint _stack_idx; // Used in loop unrolling only for swpointer trace
@ -624,17 +627,22 @@ class SWPointer {
int scale_in_bytes() { return _scale; }
Node* invar() { return _invar; }
bool negate_invar() { return _negate_invar; }
Node* invar_scale() { return _invar_scale; }
int offset_in_bytes() { return _offset; }
int memory_size() { return _mem->memory_size(); }
Node_Stack* node_stack() { return _nstack; }
// Comparable?
bool invar_equals(SWPointer& q) {
return (_invar == q._invar &&
_invar_scale == q._invar_scale &&
_negate_invar == q._negate_invar);
}
int cmp(SWPointer& q) {
if (valid() && q.valid() &&
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
_scale == q._scale &&
_invar == q._invar &&
_negate_invar == q._negate_invar) {
_scale == q._scale && invar_equals(q)) {
bool overlap = q._offset < _offset + memory_size() &&
_offset < q._offset + q.memory_size();
return overlap ? Equal : (_offset < q._offset ? Less : Greater);
@ -704,7 +712,7 @@ class SWPointer {
void scaled_iv_6(Node* n, int scale);
void scaled_iv_7(Node* n);
void scaled_iv_8(Node* n, SWPointer* tmp);
void scaled_iv_9(Node* n, int _scale, int _offset, int mult);
void scaled_iv_9(Node* n, int _scale, int _offset, Node* _invar, bool _negate_invar);
void scaled_iv_10(Node* n);
void offset_plus_k_1(Node* n);

@ -0,0 +1,220 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* @test
* @bug 8257531
* @summary Test vectorization for Buffer operations.
* @library /test/lib /
* @requires vm.compiler2.enabled & vm.debug == true
* @requires os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64" | os.arch=="aarch64"
* @run main compiler.vectorization.TestBufferVectorization array
* @run main compiler.vectorization.TestBufferVectorization arrayOffset
* @run main compiler.vectorization.TestBufferVectorization buffer
* @run main compiler.vectorization.TestBufferVectorization bufferHeap
* @run main compiler.vectorization.TestBufferVectorization bufferDirect
*/
package compiler.vectorization;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
import jdk.test.lib.process.ProcessTools;
import jdk.test.lib.process.OutputAnalyzer;
public class TestBufferVectorization {
final static int N = 500;
final static int ITER = 1000;
final static IntBuffer buffer = IntBuffer.allocate(N);
final static int offset = buffer.arrayOffset();
final static IntBuffer heap_buffer_byte_to_int = ByteBuffer.allocate(N * Integer.BYTES).order(ByteOrder.nativeOrder()).asIntBuffer();
final static IntBuffer direct_buffer_byte_to_int = ByteBuffer.allocateDirect(N * Integer.BYTES).order(ByteOrder.nativeOrder()).asIntBuffer();
final static String arch = System.getProperty("os.arch");
interface Test {
void init();
void run();
void verify();
}
static class TestArray implements Test {
final int[] array = new int[N];
public void init() {
for (int k = 0; k < array.length; k++) {
array[k] = k;
}
}
public void run() {
for(int k = 0; k < array.length; k++) {
array[k] += 1;
}
}
public void verify() {
init(); // reset
run(); // run compiled code
for(int k = 0; k < array.length; k++) {
if (array[k] != (k + 1)) {
throw new RuntimeException(" Invalid result: array[" + k + "]: " + array[k] + " != " + (k + 1));
}
}
}
}
static class TestArrayOffset implements Test {
final int offset;
final int[] array = new int[N];
public TestArrayOffset(int off) {
offset = off;
}
public void init() {
for (int k = 0; k < array.length; k++) {
array[k] = k;
}
}
public void run() {
int l = array.length - offset;
for(int k = 0; k < l; k++) {
array[k + offset] += 1;
}
}
public void verify() {
init(); // reset
run(); // run compiled code
int l = array.length - offset;
for(int k = 0; k < l; k++) {
if (array[k] != (k + 1)) {
throw new RuntimeException(" Invalid result: arrayOffset[" + k + "]: " + array[k] + " != " + (k + 1));
}
}
for(int k = l; k < array.length; k++) {
if (array[k] != k) {
throw new RuntimeException(" Invalid result: arrayOffset[" + k + "]: " + array[k] + " != " + k);
}
}
}
}
static class TestBuffer implements Test {
final IntBuffer buffer;
public TestBuffer(IntBuffer buf) {
buffer = buf;
}
public void init() {
for (int k = 0; k < buffer.limit(); k++) {
buffer.put(k, k);
}
}
public void run() {
for (int k = 0; k < buffer.limit(); k++) {
buffer.put(k, buffer.get(k) + 1);
}
}
public void verify() {
init(); // reset
run(); // run compiled code
for(int k = 0; k < buffer.limit(); k++) {
if (buffer.get(k) != (k + 1)) {
throw new RuntimeException(" Invalid result: buffer.get(" + k + "): " + buffer.get(k) + " != " + (k + 1));
}
}
}
}
public static void main(String[] args) {
if (args.length == 0) {
throw new RuntimeException(" Missing test name: array, arrayOffset, buffer, bufferHeap, bufferDirect");
}
Test te;
switch (args[0]) {
case "array":
te = new TestArray();
break;
case "arrayOffset":
te = new TestArrayOffset(offset);
break;
case "buffer":
te = new TestBuffer(buffer);
break;
case "bufferHeap":
te = new TestBuffer(heap_buffer_byte_to_int);
break;
case "bufferDirect":
te = new TestBuffer(direct_buffer_byte_to_int);
break;
default:
throw new RuntimeException(" Unknown test: " + args[0]);
}
te.init();
for (int i = 0; i < ITER; i++) {
te.run();
}
te.verify();
if (args.length == 1) {
verify_vectors(te, args[0]);
}
}
static void verify_vectors(Test t, String testName) {
if (testName.equals("bufferDirect")) {
return; // bufferDirect uses Unsafe memory accesses which are not vectorized currently
}
if (testName.equals("bufferHeap") && (arch.equals("x86") || arch.equals("i386"))) {
return; // bufferHeap uses Long type for memory accesses which are not vectorized in 32-bit VM
}
ProcessBuilder pb;
OutputAnalyzer out;
try {
pb = ProcessTools.createJavaProcessBuilder("-XX:-BackgroundCompilation",
"-XX:+TraceNewVectors",
"compiler.vectorization.TestBufferVectorization",
testName,
"skip_verify");
out = new OutputAnalyzer(pb.start());
} catch (Exception e) {
throw new RuntimeException(" Exception launching Java process: " + e);
}
out.shouldContain("ReplicateI");
out.shouldContain("LoadVector");
out.shouldContain("AddVI");
out.shouldContain("StoreVector");
out.shouldHaveExitValue(0);
}
}