8328544: Improve handling of vectorization

Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org>
Reviewed-by: mschoene, kvn, chagedorn, rhalade
This commit is contained in:
Emanuel Peter 2024-05-21 10:31:14 +00:00 committed by Jaikiran Pai
parent 03bc6b359f
commit cfa25b71a6
8 changed files with 936 additions and 166 deletions

View File

@ -416,6 +416,10 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
#ifdef ASSERT
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
#endif
_has_int_index_after_convI2L(false),
_int_index_after_convI2L_offset(0),
_int_index_after_convI2L_invar(nullptr),
_int_index_after_convI2L_scale(0),
_nstack(nstack), _analyze_only(analyze_only), _stack_idx(0)
#ifndef PRODUCT
, _tracer(vloop.is_trace_pointer_analysis())
@ -495,6 +499,11 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
return;
}
if (!is_safe_to_use_as_simple_form(base, adr)) {
assert(!valid(), "does not have simple form");
return;
}
_base = base;
_adr = adr;
assert(valid(), "Usable");
@ -508,6 +517,10 @@ VPointer::VPointer(VPointer* p) :
#ifdef ASSERT
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
#endif
_has_int_index_after_convI2L(false),
_int_index_after_convI2L_offset(0),
_int_index_after_convI2L_invar(nullptr),
_int_index_after_convI2L_scale(0),
_nstack(p->_nstack), _analyze_only(p->_analyze_only), _stack_idx(p->_stack_idx)
#ifndef PRODUCT
, _tracer(p->_tracer._is_trace_alignment)
@ -530,6 +543,354 @@ int VPointer::invar_factor() const {
return 1;
}
// We would like to make decisions about aliasing (i.e. removing memory edges) and adjacency
// (i.e. which loads/stores can be packed) based on the simple form:
//
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
//
// However, we parse the compound-long-int form:
//
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
// int_index = int_offset + int_invar + int_scale * iv
//
// In general, the simple and the compound-long-int form do not always compute the same pointer
// at runtime. For example, the simple form would give a different result due to an overflow
// in the int_index.
//
// Example:
// For both forms, we have:
// iv = 0
// scale = 1
//
// We now account the offset and invar once to the long part and once to the int part:
// Pointer 1 (long offset and long invar):
// long_offset = min_int
// long_invar = min_int
// int_offset = 0
// int_invar = 0
//
// Pointer 2 (int offset and int invar):
// long_offset = 0
// long_invar = 0
// int_offset = min_int
// int_invar = min_int
//
// This gives us the following pointers:
// Compound-long-int form pointers:
// Form:
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + int_invar + int_scale * iv)
//
// Pointers:
// c_pointer1 = adr + min_int + min_int + 1 * ConvI2L(0 + 0 + 1 * 0)
// = adr + min_int + min_int
// = adr - 2^32
//
// c_pointer2 = adr + 0 + 0 + 1 * ConvI2L(min_int + min_int + 1 * 0)
// = adr + ConvI2L(min_int + min_int)
// = adr + 0
// = adr
//
// Simple form pointers:
// Form:
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
// s_pointer = adr + (long_offset + int_offset) + (long_invar + int_invar) + (long_scale * int_scale) * ConvI2L(iv)
//
// Pointers:
// s_pointer1 = adr + (min_int + 0 ) + (min_int + 0 ) + 1 * 0
// = adr + min_int + min_int
// = adr - 2^32
// s_pointer2 = adr + (0 + min_int ) + (0 + min_int ) + 1 * 0
// = adr + min_int + min_int
// = adr - 2^32
//
// We see that the two addresses are actually 2^32 bytes apart (derived from the c_pointers), but their simple form look identical.
//
// Hence, we need to determine in which cases it is safe to make decisions based on the simple
// form, rather than the compound-long-int form. If we cannot prove that using the simple form
// is safe (i.e. equivalent to the compound-long-int form), then we do not get a valid VPointer,
// and the associated memop cannot be vectorized.
bool VPointer::is_safe_to_use_as_simple_form(Node* base, Node* adr) const {
#ifndef _LP64
// On 32-bit platforms, there is never an explicit int_index with ConvI2L for the iv. Thus, the
// parsed pointer form is always the simple form, with int operations:
//
// pointer = adr + offset + invar + scale * iv
//
assert(!_has_int_index_after_convI2L, "32-bit never has an int_index with ConvI2L for the iv");
return true;
#else
// Array accesses that are not Unsafe always have a RangeCheck which ensures that there is no
// int_index overflow. This implies that the conversion to long can be done separately:
//
// ConvI2L(int_index) = ConvI2L(int_offset) + ConvI2L(int_invar) + ConvI2L(scale) * ConvI2L(iv)
//
// And hence, the simple form is guaranteed to be identical to the compound-long-int form at
// runtime and the VPointer is safe/valid to be used.
const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr();
if (ary_ptr_t != nullptr) {
if (!_mem->is_unsafe_access()) {
return true;
}
}
// We did not find the int_index. Just to be safe, reject this VPointer.
if (!_has_int_index_after_convI2L) {
return false;
}
int int_offset = _int_index_after_convI2L_offset;
Node* int_invar = _int_index_after_convI2L_invar;
int int_scale = _int_index_after_convI2L_scale;
int long_scale = _scale / int_scale;
// If "int_index = iv", then the simple form is identical to the compound-long-int form.
//
// int_index = int_offset + int_invar + int_scale * iv
// = 0 0 1 * iv
// = iv
if (int_offset == 0 && int_invar == nullptr && int_scale == 1) {
return true;
}
// Intuition: What happens if the int_index overflows? Let us look at two pointers on the "overflow edge":
//
// pointer1 = adr + ConvI2L(int_index1)
// pointer2 = adr + ConvI2L(int_index2)
//
// int_index1 = max_int + 0 = max_int -> very close to but before the overflow
// int_index2 = max_int + 1 = min_int -> just enough to get the overflow
//
// When looking at the difference of pointer1 and pointer2, we notice that it is very large
// (almost 2^32). Since arrays have at most 2^31 elements, chances are high that pointer2 is
// an actual out-of-bounds access at runtime. These would normally be prevented by range checks
// at runtime. However, if the access was done by using Unsafe, where range checks are omitted,
// then an out-of-bounds access constitutes undefined behavior. This means that we are allowed to
// do anything, including changing the behavior.
//
// If we can set the right conditions, we have a guarantee that an overflow is either impossible
// (no overflow or range checks preventing that) or undefined behavior. In both cases, we are
// safe to do a vectorization.
//
// Approach: We want to prove a lower bound for the distance between these two pointers, and an
// upper bound for the size of a memory object. We can derive such an upper bound for
// arrays. We know they have at most 2^31 elements. If we know the size of the elements
// in bytes, we have:
//
// array_element_size_in_bytes * 2^31 >= max_possible_array_size_in_bytes
// >= array_size_in_bytes (ARR)
//
// If some small difference "delta" leads to an int_index overflow, we know that the
// int_index1 before overflow must have been close to max_int, and the int_index2 after
// the overflow must be close to min_int:
//
// pointer1 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index1)
// =approx adr + long_offset + long_invar + long_scale * max_int
//
// pointer2 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index2)
// =approx adr + long_offset + long_invar + long_scale * min_int
//
// We realize that the pointer difference is very large:
//
// difference =approx long_scale * 2^32
//
// Hence, if we set the right condition for long_scale and array_element_size_in_bytes,
// we can prove that an overflow is impossible (or would imply undefined behaviour).
//
// We must now take this intuition, and develop a rigorous proof. We start by stating the problem
// more precisely, with the help of some definitions and the Statement we are going to prove.
//
// Definition:
// Two VPointers are "comparable" (i.e. VPointer::comparable is true, set with VPointer::cmp()),
// iff all of these conditions apply for the simple form:
// 1) Both VPointers are valid.
// 2) The adr are identical, or both are array bases of different arrays.
// 3) They have identical scale.
// 4) They have identical invar.
// 5) The difference in offsets is limited: abs(offset1 - offset2) < 2^31. (DIFF)
//
// For the Vectorization Optimization, we pair-wise compare VPointers and determine if they are:
// 1) "not comparable":
// We do not optimize them (assume they alias, not assume adjacency).
//
// Whenever we chose this option based on the simple form, it is also correct based on the
// compound-long-int form, since we make no optimizations based on it.
//
// 2) "comparable" with different array bases at runtime:
// We assume they do not alias (remove memory edges), but not assume adjacency.
//
// Whenever we have two different array bases for the simple form, we also have different
// array bases for the compound-long-form. Since VPointers provably point to different
// memory objects, they can never alias.
//
// 3) "comparable" with the same base address:
// We compute the relative pointer difference, and based on the load/store size we can
// compute aliasing and adjacency.
//
// We must find a condition under which the pointer difference of the simple form is
// identical to the pointer difference of the compound-long-form. We do this with the
// Statement below, which we then proceed to prove.
//
// Statement:
// If two VPointers satisfy these 3 conditions:
// 1) They are "comparable".
// 2) They have the same base address.
// 3) Their long_scale is a multiple of the array element size in bytes:
//
// abs(long_scale) % array_element_size_in_bytes = 0 (A)
//
// Then their pointer difference of the simple form is identical to the pointer difference
// of the compound-long-int form.
//
// More precisely:
// Such two VPointers by definition have identical adr, invar, and scale.
// Their simple form is:
//
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv) (B1)
// s_pointer2 = adr + offset2 + invar + scale * ConvI2L(iv) (B2)
//
// Thus, the pointer difference of the simple forms collapses to the difference in offsets:
//
// s_difference = s_pointer1 - s_pointer2 = offset1 - offset2 (C)
//
// Their compound-long-int form for these VPointer is:
//
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1) (D1)
// int_index1 = int_offset1 + int_invar1 + int_scale1 * iv (D2)
//
// c_pointer2 = adr + long_offset2 + long_invar2 + long_scale2 * ConvI2L(int_index2) (D3)
// int_index2 = int_offset2 + int_invar2 + int_scale2 * iv (D4)
//
// And these are the offset1, offset2, invar and scale from the simple form (B1) and (B2):
//
// offset1 = long_offset1 + long_scale1 * ConvI2L(int_offset1) (D5)
// offset2 = long_offset2 + long_scale2 * ConvI2L(int_offset2) (D6)
//
// invar = long_invar1 + long_scale1 * ConvI2L(int_invar1)
// = long_invar2 + long_scale2 * ConvI2L(int_invar2) (D7)
//
// scale = long_scale1 * ConvI2L(int_scale1)
// = long_scale2 * ConvI2L(int_scale2) (D8)
//
// The pointer difference of the compound-long-int form is defined as:
//
// c_difference = c_pointer1 - c_pointer2
//
// Thus, the statement claims that for the two VPointer we have:
//
// s_difference = c_difference (Statement)
//
// We prove the Statement with the help of a Lemma:
//
// Lemma:
// There is some integer x, such that:
//
// c_difference = s_difference + array_element_size_in_bytes * x * 2^32 (Lemma)
//
// From condition (DIFF), we can derive:
//
// abs(s_difference) < 2^31 (E)
//
// Assuming the Lemma, we prove the Statement:
// If "x = 0" (intuitively: the int_index does not overflow), then:
// c_difference = s_difference
// and hence the simple form computes the same pointer difference as the compound-long-int form.
// If "x != 0" (intuitively: the int_index overflows), then:
// abs(c_difference) >= abs(s_difference + array_element_size_in_bytes * x * 2^32)
// >= array_element_size_in_bytes * 2^32 - abs(s_difference)
// -- apply (E) --
// > array_element_size_in_bytes * 2^32 - 2^31
// >= array_element_size_in_bytes * 2^31
// -- apply (ARR) --
// >= max_possible_array_size_in_bytes
// >= array_size_in_bytes
//
// This shows that c_pointer1 and c_pointer2 have a distance that exceeds the maximum array size.
// Thus, at least one of the two pointers must be outside of the array bounds. But we can assume
// that out-of-bounds accesses do not happen. If they still do, it is undefined behavior. Hence,
// we are allowed to do anything. We can also "safely" use the simple form in this case even though
// it might not match the compound-long-int form at runtime.
// QED Statement.
//
// We must now prove the Lemma.
//
// ConvI2L always truncates by some power of 2^32, i.e. there is some integer y such that:
//
// ConvI2L(y1 + y2) = ConvI2L(y1) + ConvI2L(y2) + 2^32 * y (F)
//
// It follows, that there is an integer y1 such that:
//
// ConvI2L(int_index1) = ConvI2L(int_offset1 + int_invar1 + int_scale1 * iv)
// -- apply (F) --
// = ConvI2L(int_offset1)
// + ConvI2L(int_invar1)
// + ConvI2L(int_scale1) * ConvI2L(iv)
// + y1 * 2^32 (G)
//
// Thus, we can write the compound-long-int form (D1) as:
//
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1)
// -- apply (G) --
// = adr
// + long_offset1
// + long_invar1
// + long_scale1 * ConvI2L(int_offset1)
// + long_scale1 * ConvI2L(int_invar1)
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv)
// + long_scale1 * y1 * 2^32 (H)
//
// And we can write the simple form as:
//
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv)
// -- apply (D5, D7, D8) --
// = adr
// + long_offset1
// + long_scale1 * ConvI2L(int_offset1)
// + long_invar1
// + long_scale1 * ConvI2L(int_invar1)
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv) (K)
//
// We now compute the pointer difference between the simple (K) and compound-long-int form (H).
// Most terms cancel out immediately:
//
// sc_difference1 = c_pointer1 - s_pointer1 = long_scale1 * y1 * 2^32 (L)
//
// Rearranging the equation (L), we get:
//
// c_pointer1 = s_pointer1 + long_scale1 * y1 * 2^32 (M)
//
// And since long_scale1 is a multiple of array_element_size_in_bytes, there is some integer
// x1, such that (M) implies:
//
// c_pointer1 = s_pointer1 + array_element_size_in_bytes * x1 * 2^32 (N)
//
// With an analogue equation for c_pointer2, we can now compute the pointer difference for
// the compound-long-int form:
//
// c_difference = c_pointer1 - c_pointer2
// -- apply (N) --
// = s_pointer1 + array_element_size_in_bytes * x1 * 2^32
// -(s_pointer2 + array_element_size_in_bytes * x2 * 2^32)
// -- where "x = x1 - x2" --
// = s_pointer1 - s_pointer2 + array_element_size_in_bytes * x * 2^32
// -- apply (C) --
// = s_difference + array_element_size_in_bytes * x * 2^32
// QED Lemma.
if (ary_ptr_t != nullptr) {
BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type();
if (is_java_primitive(array_element_bt)) {
int array_element_size_in_bytes = type2aelembytes(array_element_bt);
if (abs(long_scale) % array_element_size_in_bytes == 0) {
return true;
}
}
}
// General case: we do not know if it is safe to use the simple form.
return false;
#endif
}
bool VPointer::is_loop_member(Node* n) const {
Node* n_c = phase()->get_ctrl(n);
return lpt()->is_member(phase()->get_loop(n_c));
@ -632,6 +993,37 @@ bool VPointer::scaled_iv(Node* n) {
NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
return true;
}
} else if (opc == Op_ConvI2L && !has_iv()) {
// So far we have not found the iv yet, and are about to enter a ConvI2L subgraph,
// which may be the int index (that might overflow) for the memory access, of the form:
//
// int_index = int_offset + int_invar + int_scale * iv
//
// If we simply continue parsing with the current VPointer, then the int_offset and
// int_invar simply get added to the long offset and invar. But for the checks in
// VPointer::is_safe_to_use_as_simple_form() we need to have explicit access to the
// int_index. Thus, we must parse it explicitly here. For this, we use a temporary
// VPointer, to pattern match the int_index sub-expression of the address.
NOT_PRODUCT(Tracer::Depth dddd;)
VPointer tmp(this);
NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
if (tmp.scaled_iv_plus_offset(n->in(1)) && tmp.has_iv()) {
// We successfully matched an integer index, of the form:
// int_index = int_offset + int_invar + int_scale * iv
_has_int_index_after_convI2L = true;
_int_index_after_convI2L_offset = tmp._offset;
_int_index_after_convI2L_invar = tmp._invar;
_int_index_after_convI2L_scale = tmp._scale;
}
// Now parse it again for the real VPointer. This makes sure that the int_offset, int_invar,
// and int_scale are properly added to the final VPointer's offset, invar, and scale.
if (scaled_iv_plus_offset(n->in(1))) {
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
return true;
}
} else if (opc == Op_ConvI2L || opc == Op_CastII) {
if (scaled_iv_plus_offset(n->in(1))) {
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
@ -648,8 +1040,17 @@ bool VPointer::scaled_iv(Node* n) {
if (tmp.scaled_iv_plus_offset(n->in(1))) {
int scale = n->in(2)->get_int();
// Accumulate scale.
_scale = tmp._scale << scale;
_offset += tmp._offset << scale;
// Accumulate offset.
int shifted_offset = 0;
if (!try_LShiftI_no_overflow(tmp._offset, scale, shifted_offset)) {
return false; // shift overflow.
}
if (!try_AddI_no_overflow(_offset, shifted_offset, _offset)) {
return false; // add overflow.
}
// Accumulate invar.
if (tmp._invar != nullptr) {
BasicType bt = tmp._invar->bottom_type()->basic_type();
assert(bt == T_INT || bt == T_LONG, "");
@ -658,6 +1059,13 @@ bool VPointer::scaled_iv(Node* n) {
_debug_invar_scale = n->in(2);
#endif
}
// Forward info about the int_index:
_has_int_index_after_convI2L = tmp._has_int_index_after_convI2L;
_int_index_after_convI2L_offset = tmp._int_index_after_convI2L_offset;
_int_index_after_convI2L_invar = tmp._int_index_after_convI2L_invar;
_int_index_after_convI2L_scale = tmp._int_index_after_convI2L_scale;
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);)
return true;
}
@ -675,7 +1083,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
int opc = n->Opcode();
if (opc == Op_ConI) {
_offset += negate ? -(n->get_int()) : n->get_int();
if (!try_AddSubI_no_overflow(_offset, n->get_int(), negate, _offset)) {
return false; // add/sub overflow.
}
NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
return true;
} else if (opc == Op_ConL) {
@ -684,7 +1094,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
if (t->higher_equal(TypeLong::INT)) {
jlong loff = n->get_long();
jint off = (jint)loff;
_offset += negate ? -off : loff;
if (!try_AddSubI_no_overflow(_offset, off, negate, _offset)) {
return false; // add/sub overflow.
}
NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
return true;
}
@ -699,11 +1111,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
if (opc == Op_AddI) {
if (n->in(2)->is_Con() && invariant(n->in(1))) {
maybe_add_to_invar(n->in(1), negate);
_offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), negate, _offset)) {
return false; // add/sub overflow.
}
NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);)
return true;
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
return false; // add/sub overflow.
}
maybe_add_to_invar(n->in(2), negate);
NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);)
return true;
@ -712,11 +1128,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
if (opc == Op_SubI) {
if (n->in(2)->is_Con() && invariant(n->in(1))) {
maybe_add_to_invar(n->in(1), negate);
_offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), !negate, _offset)) {
return false; // add/sub overflow.
}
NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);)
return true;
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
return false; // add/sub overflow.
}
maybe_add_to_invar(n->in(2), !negate);
NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);)
return true;
@ -806,6 +1226,44 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
_invar = register_if_new(add);
}
bool VPointer::try_AddI_no_overflow(int offset1, int offset2, int& result) {
jlong long_offset = java_add((jlong)(offset1), (jlong)(offset2));
jint int_offset = java_add( offset1, offset2);
if (long_offset != int_offset) {
return false;
}
result = int_offset;
return true;
}
bool VPointer::try_SubI_no_overflow(int offset1, int offset2, int& result) {
jlong long_offset = java_subtract((jlong)(offset1), (jlong)(offset2));
jint int_offset = java_subtract( offset1, offset2);
if (long_offset != int_offset) {
return false;
}
result = int_offset;
return true;
}
bool VPointer::try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result) {
if (is_sub) {
return try_SubI_no_overflow(offset1, offset2, result);
} else {
return try_AddI_no_overflow(offset1, offset2, result);
}
}
bool VPointer::try_LShiftI_no_overflow(int offset, int shift, int& result) {
jlong long_offset = java_shift_left((jlong)(offset), shift);
jint int_offset = java_shift_left( offset, shift);
if (long_offset != int_offset) {
return false;
}
result = int_offset;
return true;
}
// We use two comparisons, because a subtraction could underflow.
#define RETURN_CMP_VALUE_IF_NOT_EQUAL(a, b) \
if (a < b) { return -1; } \

View File

@ -670,13 +670,51 @@ private:
// A vectorization pointer (VPointer) has information about an address for
// dependence checking and vector alignment. It's usually bound to a memory
// operation in a counted loop for vectorizable analysis.
//
// We parse and represent pointers of the simple form:
//
// pointer = adr + offset + invar + scale * ConvI2L(iv)
//
// Where:
//
// adr: the base address of an array (base = adr)
// OR
// some address to off-heap memory (base = TOP)
//
// offset: a constant offset
// invar: a runtime variable, which is invariant during the loop
// scale: scaling factor
// iv: loop induction variable
//
// But more precisely, we parse the composite-long-int form:
//
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + inv_invar + int_scale * iv)
//
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
// int_index = int_offset + int_invar + int_scale * iv
//
// However, for aliasing and adjacency checks (e.g. VPointer::cmp()) we always use the simple form to make
// decisions. Hence, we must make sure to only create a "valid" VPointer if the optimisations based on the
// simple form produce the same result as the compound-long-int form would. Intuitively, this depends on
// if the int_index overflows, but the precise conditions are given in VPointer::is_safe_to_use_as_simple_form().
//
// ConvI2L(int_index) = ConvI2L(int_offset + int_invar + int_scale * iv)
// = Convi2L(int_offset) + ConvI2L(int_invar) + ConvI2L(int_scale) * ConvI2L(iv)
//
// scale = long_scale * ConvI2L(int_scale)
// offset = long_offset + long_scale * ConvI2L(int_offset)
// invar = long_invar + long_scale * ConvI2L(int_invar)
//
// pointer = adr + offset + invar + scale * ConvI2L(iv)
//
class VPointer : public ArenaObj {
protected:
MemNode* const _mem; // My memory reference node
const VLoop& _vloop;
Node* _base; // null if unsafe nonheap reference
Node* _adr; // address pointer
// Components of the simple form:
Node* _base; // Base address of an array OR null if some off-heap memory.
Node* _adr; // Same as _base if an array pointer OR some off-heap memory pointer.
int _scale; // multiplier for iv (in bytes), 0 if no loop iv
int _offset; // constant offset (in bytes)
@ -687,6 +725,13 @@ class VPointer : public ArenaObj {
Node* _debug_invar_scale; // multiplier for invariant
#endif
// The int_index components of the compound-long-int form. Used to decide if it is safe to use the
// simple form rather than the compound-long-int form that was parsed.
bool _has_int_index_after_convI2L;
int _int_index_after_convI2L_offset;
Node* _int_index_after_convI2L_invar;
int _int_index_after_convI2L_scale;
Node_Stack* _nstack; // stack used to record a vpointer trace of variants
bool _analyze_only; // Used in loop unrolling only for vpointer trace
uint _stack_idx; // Used in loop unrolling only for vpointer trace
@ -726,6 +771,8 @@ class VPointer : public ArenaObj {
VPointer(VPointer* p);
NONCOPYABLE(VPointer);
bool is_safe_to_use_as_simple_form(Node* base, Node* adr) const;
public:
bool valid() const { return _adr != nullptr; }
bool has_iv() const { return _scale != 0; }
@ -751,10 +798,43 @@ class VPointer : public ArenaObj {
return _invar == q._invar;
}
// We compute if and how two VPointers can alias at runtime, i.e. if the two addressed regions of memory can
// ever overlap. There are essentially 3 relevant return states:
// - NotComparable: Synonymous to "unknown aliasing".
// We have no information about how the two VPointers can alias. They could overlap, refer
// to another location in the same memory object, or point to a completely different object.
// -> Memory edge required. Aliasing unlikely but possible.
//
// - Less / Greater: Synonymous to "never aliasing".
// The two VPointers may point into the same memory object, but be non-aliasing (i.e. we
// know both address regions inside the same memory object, but these regions are non-
// overlapping), or the VPointers point to entirely different objects.
// -> No memory edge required. Aliasing impossible.
//
// - Equal: Synonymous to "overlap, or point to different memory objects".
// The two VPointers either overlap on the same memory object, or point to two different
// memory objects.
// -> Memory edge required. Aliasing likely.
//
// In a future refactoring, we can simplify to two states:
// - NeverAlias: instead of Less / Greater
// - MayAlias: instead of Equal / NotComparable
//
// Two VPointer are "comparable" (Less / Greater / Equal), iff all of these conditions apply:
// 1) Both are valid, i.e. expressible in the compound-long-int or simple form.
// 2) The adr are identical, or both are array bases of different arrays.
// 3) They have identical scale.
// 4) They have identical invar.
// 5) The difference in offsets is limited: abs(offset0 - offset1) < 2^31.
int cmp(const VPointer& q) const {
if (valid() && q.valid() &&
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
_scale == q._scale && invar_equals(q)) {
jlong difference = abs(java_subtract((jlong)_offset, (jlong)q._offset));
jlong max_diff = (jlong)1 << 31;
if (difference >= max_diff) {
return NotComparable;
}
bool overlap = q._offset < _offset + memory_size() &&
_offset < q._offset + q.memory_size();
return overlap ? Equal : (_offset < q._offset ? Less : Greater);
@ -859,6 +939,11 @@ class VPointer : public ArenaObj {
void maybe_add_to_invar(Node* new_invar, bool negate);
static bool try_AddI_no_overflow(int offset1, int offset2, int& result);
static bool try_SubI_no_overflow(int offset1, int offset2, int& result);
static bool try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result);
static bool try_LShiftI_no_overflow(int offset1, int offset2, int& result);
Node* register_if_new(Node* n) const;
};

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,7 +36,6 @@ import java.nio.ByteOrder;
* @test
* @bug 8300258
* @key randomness
* @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64")
* @summary C2: vectorization fails on simple ByteBuffer loop
* @modules java.base/jdk.internal.misc
* @library /test/lib /
@ -147,193 +147,420 @@ public class TestVectorizationMismatchedAccess {
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong1(byte[] dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteLong1a(byte[] dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, src[i]);
}
}
@Run(test = "testByteLong1")
public static void testByteLong1_runner() {
runAndVerify(() -> testByteLong1(byteArray, longArray), 0);
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteLong1b(byte[] dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, src[i]);
}
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong2(byte[] dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
public static void testByteLong1c(byte[] dest, long[] src) {
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
for (int i = 0; i < src.length - 8; i++) {
UNSAFE.putLongUnaligned(dest, base + 8 * i, src[i]);
}
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteLong1d(byte[] dest, long[] src) {
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
for (int i = 0; i < src.length - 8; i++) {
UNSAFE.putLongUnaligned(dest, base + 8L * i, src[i]);
}
}
@Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"})
public static void testByteLong1_runner() {
runAndVerify(() -> testByteLong1a(byteArray, longArray), 0);
runAndVerify(() -> testByteLong1b(byteArray, longArray), 0);
testByteLong1c(byteArray, longArray);
testByteLong1d(byteArray, longArray);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteLong2a(byte[] dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), src[i]);
}
}
@Run(test = "testByteLong2")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteLong2b(byte[] dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), src[i]);
}
}
@Run(test = {"testByteLong2a", "testByteLong2b"})
public static void testByteLong2_runner() {
runAndVerify(() -> testByteLong2(byteArray, longArray), -8);
runAndVerify(() -> testByteLong2a(byteArray, longArray), -8);
runAndVerify(() -> testByteLong2b(byteArray, longArray), -8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong3(byte[] dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteLong3a(byte[] dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), src[i]);
}
}
@Run(test = "testByteLong3")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteLong3b(byte[] dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), src[i]);
}
}
@Run(test = {"testByteLong3a", "testByteLong3b"})
public static void testByteLong3_runner() {
runAndVerify(() -> testByteLong3(byteArray, longArray), 8);
runAndVerify(() -> testByteLong3a(byteArray, longArray), 8);
runAndVerify(() -> testByteLong3b(byteArray, longArray), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"},
applyIf = {"AlignVector", "false"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
// AlignVector cannot guarantee that invar is aligned.
public static void testByteLong4(byte[] dest, long[] src, int start, int stop) {
public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, src[i]);
}
}
@Run(test = "testByteLong4")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"},
applyIf = {"AlignVector", "false"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
// AlignVector cannot guarantee that invar is aligned.
public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, src[i]);
}
}
@Run(test = {"testByteLong4a", "testByteLong4b"})
public static void testByteLong4_runner() {
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
runAndVerify(() -> testByteLong4(byteArray, longArray, 0, size), 0);
runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0);
runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteLong5(byte[] dest, long[] src, int start, int stop) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), src[i]);
}
}
@Run(test = "testByteLong5")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), src[i]);
}
}
@Run(test = {"testByteLong5a", "testByteLong5b"})
public static void testByteLong5_runner() {
baseOffset = 1;
runAndVerify(() -> testByteLong5(byteArray, longArray, 0, size-1), 8);
runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8);
runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteByte1(byte[] dest, byte[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteByte1a(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte1")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteByte1b(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
}
}
@Run(test = {"testByteByte1a", "testByteByte1b"})
public static void testByteByte1_runner() {
runAndVerify2(() -> testByteByte1(byteArray, byteArray), 0);
runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0);
runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testByteByte2(byte[] dest, byte[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
// might get fixed with JDK-8325155.
public static void testByteByte2a(byte[] dest, byte[] src) {
for (int i = 1; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte2")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
applyIfPlatform = {"64-bit", "true"})
// 32-bit: address has ConvL2I for cast of long to address, not supported.
public static void testByteByte2b(byte[] dest, byte[] src) {
for (int i = 1; i < src.length / 8; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
}
}
@Run(test = {"testByteByte2a", "testByteByte2b"})
public static void testByteByte2_runner() {
runAndVerify2(() -> testByteByte2(byteArray, byteArray), -8);
runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8);
runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte3(byte[] dest, byte[] src) {
public static void testByteByte3a(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8 - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte3")
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte3b(byte[] dest, byte[] src) {
for (int i = 0; i < src.length / 8 - 1; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
}
}
@Run(test = {"testByteByte3a", "testByteByte3b"})
public static void testByteByte3_runner() {
runAndVerify2(() -> testByteByte3(byteArray, byteArray), 8);
runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8);
runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte4(byte[] dest, byte[] src, int start, int stop) {
public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte4")
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
}
}
@Run(test = {"testByteByte4a", "testByteByte4b"})
public static void testByteByte4_runner() {
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
runAndVerify2(() -> testByteByte4(byteArray, byteArray, 0, size), 0);
runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0);
runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0);
}
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte5(byte[] dest, byte[] src, int start, int stop) {
public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
}
}
@Run(test = "testByteByte5")
@Test
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
}
}
@Run(test = {"testByteByte5a", "testByteByte5b"})
public static void testByteByte5_runner() {
baseOffset = 1;
runAndVerify2(() -> testByteByte5(byteArray, byteArray, 0, size-1), 8);
runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8);
runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong1(long dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P(dest + 8 * (i + int_con))
// See: JDK-8331576
public static void testOffHeapLong1a(long dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * i, src[i]);
}
}
@Run(test = "testOffHeapLong1")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P(dest + 8L * (i + int_con))
// See: JDK-8331576
public static void testOffHeapLong1b(long dest, long[] src) {
for (int i = 0; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8L * i, src[i]);
}
}
@Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"})
public static void testOffHeapLong1_runner() {
runAndVerify3(() -> testOffHeapLong1(baseOffHeap, longArray), 0);
runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0);
runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong2(long dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P
// See: JDK-8331576
public static void testOffHeapLong2a(long dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), src[i]);
}
}
@Run(test = "testOffHeapLong2")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P
// See: JDK-8331576
public static void testOffHeapLong2b(long dest, long[] src) {
for (int i = 1; i < src.length; i++) {
UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), src[i]);
}
}
@Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"})
public static void testOffHeapLong2_runner() {
runAndVerify3(() -> testOffHeapLong2(baseOffHeap, longArray), -8);
runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8);
runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
public static void testOffHeapLong3(long dest, long[] src) {
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P
// See: JDK-8331576
public static void testOffHeapLong3a(long dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), src[i]);
}
}
@Run(test = "testOffHeapLong3")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
// FAILS: adr is CastX2P
// See: JDK-8331576
public static void testOffHeapLong3b(long dest, long[] src) {
for (int i = 0; i < src.length - 1; i++) {
UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), src[i]);
}
}
@Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"})
public static void testOffHeapLong3_runner() {
runAndVerify3(() -> testOffHeapLong3(baseOffHeap, longArray), 8);
runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8);
runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8);
}
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
applyIf = {"AlignVector", "false"})
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
// applyIf = {"AlignVector", "false"})
// FAILS: adr is CastX2P
// See: JDK-8331576
// AlignVector cannot guarantee that invar is aligned.
public static void testOffHeapLong4(long dest, long[] src, int start, int stop) {
public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, src[i]);
}
}
@Run(test = "testOffHeapLong4")
@Test
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
// applyIf = {"AlignVector", "false"})
// FAILS: adr is CastX2P
// See: JDK-8331576
// AlignVector cannot guarantee that invar is aligned.
public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) {
for (int i = start; i < stop; i++) {
UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, src[i]);
}
}
@Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"})
public static void testOffHeapLong4_runner() {
baseOffset = 8;
runAndVerify3(() -> testOffHeapLong4(baseOffHeap, longArray, 0, size-1), 8);
runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8);
runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8);
}
}

View File

@ -1363,7 +1363,7 @@ public class TestAlignVector {
static Object[] test17a(long[] a) {
// Unsafe: vectorizes with profiling (not xcomp)
for (int i = 0; i < RANGE; i++) {
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
long v = UNSAFE.getLongUnaligned(a, adr);
UNSAFE.putLongUnaligned(a, adr, v + 1);
}
@ -1375,7 +1375,7 @@ public class TestAlignVector {
static Object[] test17b(long[] a) {
// Not alignable
for (int i = 0; i < RANGE-1; i++) {
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
long v = UNSAFE.getLongUnaligned(a, adr);
UNSAFE.putLongUnaligned(a, adr, v + 1);
}
@ -1392,7 +1392,7 @@ public class TestAlignVector {
static Object[] test17c(long[] a) {
// Unsafe: aligned vectorizes
for (int i = 0; i < RANGE-1; i+=4) {
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
@ -1422,7 +1422,7 @@ public class TestAlignVector {
static Object[] test17d(long[] a) {
// Not alignable
for (int i = 0; i < RANGE-1; i+=4) {
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);

View File

@ -1090,11 +1090,11 @@ public class TestAlignVectorFuzzer {
int init = init_con_or_var();
int limit = limit_con_or_var();
int stride = stride_con();
int scale = scale_con();
int offset = offset1_con_or_var();
long scale = scale_con();
long offset = offset1_con_or_var();
for (int i = init; i < limit; i += stride) {
int adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
long adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
int v = UNSAFE.getIntUnaligned(a, adr);
UNSAFE.putIntUnaligned(a, adr, v + 1);
}
@ -1105,19 +1105,19 @@ public class TestAlignVectorFuzzer {
int init = init_con_or_var();
int limit = limit_con_or_var();
int stride = stride_con();
int scale = scale_con();
int offset1 = offset1_con_or_var();
int offset2 = offset2_con_or_var();
int offset3 = offset3_con_or_var();
long scale = scale_con();
long offset1 = offset1_con_or_var();
long offset2 = offset2_con_or_var();
long offset3 = offset3_con_or_var();
int h1 = hand_unrolling1_con();
int h2 = hand_unrolling2_con();
int h3 = hand_unrolling3_con();
for (int i = init; i < limit; i += stride) {
int adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
int adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
int adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
long adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
long adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
long adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
if (h1 >= 1) { UNSAFE.putIntUnaligned(a, adr1 + 0*4, UNSAFE.getIntUnaligned(a, adr1 + 0*4) + 1); }
if (h1 >= 2) { UNSAFE.putIntUnaligned(a, adr1 + 1*4, UNSAFE.getIntUnaligned(a, adr1 + 1*4) + 1); }

View File

@ -172,10 +172,10 @@ public class TestIndependentPacksWithCyclicDependency {
static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
for (int i = 0; i < RANGE; i+=2) {
// int and float arrays are two slices. But we pretend both are of type int.
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
}
}
@ -248,10 +248,10 @@ public class TestIndependentPacksWithCyclicDependency {
for (int i = 0; i < RANGE; i+=2) {
// same as test2, except that reordering leads to different semantics
// explanation analogue to test4
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
}
}
@ -275,18 +275,18 @@ public class TestIndependentPacksWithCyclicDependency {
long[] dataLa, long[] dataLb) {
for (int i = 0; i < RANGE; i+=2) {
// Chain of parallelizable op and conversion
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
}
}
@ -307,18 +307,18 @@ public class TestIndependentPacksWithCyclicDependency {
long[] dataLa, long[] dataLb) {
for (int i = 0; i < RANGE; i+=2) {
// Cycle involving 3 memory slices
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
}
}
@ -340,19 +340,19 @@ public class TestIndependentPacksWithCyclicDependency {
long[] dataLa, long[] dataLb) {
for (int i = 0; i < RANGE; i+=2) {
// 2-cycle, with more ops after
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
// more stuff after
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
}
}
@ -373,19 +373,19 @@ public class TestIndependentPacksWithCyclicDependency {
long[] dataLa, long[] dataLb) {
for (int i = 0; i < RANGE; i+=2) {
// 2-cycle, with more stuff before
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
// 2-cycle
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
}
}
@ -423,18 +423,18 @@ public class TestIndependentPacksWithCyclicDependency {
//
// The cycle thus does not only go via packs, but also scalar ops.
//
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
}
}
@ -463,8 +463,8 @@ public class TestIndependentPacksWithCyclicDependency {
static void verify(String name, float[] data, float[] gold) {
for (int i = 0; i < RANGE; i++) {
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
if (datav != goldv) {
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
}

View File

@ -58,18 +58,18 @@ public class TestIndependentPacksWithCyclicDependency2 {
long[] dataLa, long[] dataLb) {
for (int i = 0; i < RANGE; i+=2) {
// For explanation, see test 10 in TestIndependentPacksWithCyclicDependency.java
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43;
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
}
}
@ -83,8 +83,8 @@ public class TestIndependentPacksWithCyclicDependency2 {
static void verify(String name, float[] data, float[] gold) {
for (int i = 0; i < RANGE; i++) {
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
if (datav != goldv) {
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
}

View File

@ -124,10 +124,10 @@ public class TestScheduleReordersScalarMemops {
for (int i = 0; i < RANGE; i+=2) {
// Do the same as test0, but without int-float conversion.
// This should reproduce on machines where conversion is not implemented.
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A +1
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] * 11); // B *11
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A +1
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11
}
}