8328544: Improve handling of vectorization
Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org> Reviewed-by: mschoene, kvn, chagedorn, rhalade
This commit is contained in:
parent
03bc6b359f
commit
cfa25b71a6
@ -416,6 +416,10 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
|
||||
#ifdef ASSERT
|
||||
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
||||
#endif
|
||||
_has_int_index_after_convI2L(false),
|
||||
_int_index_after_convI2L_offset(0),
|
||||
_int_index_after_convI2L_invar(nullptr),
|
||||
_int_index_after_convI2L_scale(0),
|
||||
_nstack(nstack), _analyze_only(analyze_only), _stack_idx(0)
|
||||
#ifndef PRODUCT
|
||||
, _tracer(vloop.is_trace_pointer_analysis())
|
||||
@ -495,6 +499,11 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_safe_to_use_as_simple_form(base, adr)) {
|
||||
assert(!valid(), "does not have simple form");
|
||||
return;
|
||||
}
|
||||
|
||||
_base = base;
|
||||
_adr = adr;
|
||||
assert(valid(), "Usable");
|
||||
@ -508,6 +517,10 @@ VPointer::VPointer(VPointer* p) :
|
||||
#ifdef ASSERT
|
||||
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
||||
#endif
|
||||
_has_int_index_after_convI2L(false),
|
||||
_int_index_after_convI2L_offset(0),
|
||||
_int_index_after_convI2L_invar(nullptr),
|
||||
_int_index_after_convI2L_scale(0),
|
||||
_nstack(p->_nstack), _analyze_only(p->_analyze_only), _stack_idx(p->_stack_idx)
|
||||
#ifndef PRODUCT
|
||||
, _tracer(p->_tracer._is_trace_alignment)
|
||||
@ -530,6 +543,354 @@ int VPointer::invar_factor() const {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// We would like to make decisions about aliasing (i.e. removing memory edges) and adjacency
|
||||
// (i.e. which loads/stores can be packed) based on the simple form:
|
||||
//
|
||||
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||
//
|
||||
// However, we parse the compound-long-int form:
|
||||
//
|
||||
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
|
||||
// int_index = int_offset + int_invar + int_scale * iv
|
||||
//
|
||||
// In general, the simple and the compound-long-int form do not always compute the same pointer
|
||||
// at runtime. For example, the simple form would give a different result due to an overflow
|
||||
// in the int_index.
|
||||
//
|
||||
// Example:
|
||||
// For both forms, we have:
|
||||
// iv = 0
|
||||
// scale = 1
|
||||
//
|
||||
// We now account the offset and invar once to the long part and once to the int part:
|
||||
// Pointer 1 (long offset and long invar):
|
||||
// long_offset = min_int
|
||||
// long_invar = min_int
|
||||
// int_offset = 0
|
||||
// int_invar = 0
|
||||
//
|
||||
// Pointer 2 (int offset and int invar):
|
||||
// long_offset = 0
|
||||
// long_invar = 0
|
||||
// int_offset = min_int
|
||||
// int_invar = min_int
|
||||
//
|
||||
// This gives us the following pointers:
|
||||
// Compound-long-int form pointers:
|
||||
// Form:
|
||||
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + int_invar + int_scale * iv)
|
||||
//
|
||||
// Pointers:
|
||||
// c_pointer1 = adr + min_int + min_int + 1 * ConvI2L(0 + 0 + 1 * 0)
|
||||
// = adr + min_int + min_int
|
||||
// = adr - 2^32
|
||||
//
|
||||
// c_pointer2 = adr + 0 + 0 + 1 * ConvI2L(min_int + min_int + 1 * 0)
|
||||
// = adr + ConvI2L(min_int + min_int)
|
||||
// = adr + 0
|
||||
// = adr
|
||||
//
|
||||
// Simple form pointers:
|
||||
// Form:
|
||||
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||
// s_pointer = adr + (long_offset + int_offset) + (long_invar + int_invar) + (long_scale * int_scale) * ConvI2L(iv)
|
||||
//
|
||||
// Pointers:
|
||||
// s_pointer1 = adr + (min_int + 0 ) + (min_int + 0 ) + 1 * 0
|
||||
// = adr + min_int + min_int
|
||||
// = adr - 2^32
|
||||
// s_pointer2 = adr + (0 + min_int ) + (0 + min_int ) + 1 * 0
|
||||
// = adr + min_int + min_int
|
||||
// = adr - 2^32
|
||||
//
|
||||
// We see that the two addresses are actually 2^32 bytes apart (derived from the c_pointers), but their simple form look identical.
|
||||
//
|
||||
// Hence, we need to determine in which cases it is safe to make decisions based on the simple
|
||||
// form, rather than the compound-long-int form. If we cannot prove that using the simple form
|
||||
// is safe (i.e. equivalent to the compound-long-int form), then we do not get a valid VPointer,
|
||||
// and the associated memop cannot be vectorized.
|
||||
bool VPointer::is_safe_to_use_as_simple_form(Node* base, Node* adr) const {
|
||||
#ifndef _LP64
|
||||
// On 32-bit platforms, there is never an explicit int_index with ConvI2L for the iv. Thus, the
|
||||
// parsed pointer form is always the simple form, with int operations:
|
||||
//
|
||||
// pointer = adr + offset + invar + scale * iv
|
||||
//
|
||||
assert(!_has_int_index_after_convI2L, "32-bit never has an int_index with ConvI2L for the iv");
|
||||
return true;
|
||||
#else
|
||||
|
||||
// Array accesses that are not Unsafe always have a RangeCheck which ensures that there is no
|
||||
// int_index overflow. This implies that the conversion to long can be done separately:
|
||||
//
|
||||
// ConvI2L(int_index) = ConvI2L(int_offset) + ConvI2L(int_invar) + ConvI2L(scale) * ConvI2L(iv)
|
||||
//
|
||||
// And hence, the simple form is guaranteed to be identical to the compound-long-int form at
|
||||
// runtime and the VPointer is safe/valid to be used.
|
||||
const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr();
|
||||
if (ary_ptr_t != nullptr) {
|
||||
if (!_mem->is_unsafe_access()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// We did not find the int_index. Just to be safe, reject this VPointer.
|
||||
if (!_has_int_index_after_convI2L) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int int_offset = _int_index_after_convI2L_offset;
|
||||
Node* int_invar = _int_index_after_convI2L_invar;
|
||||
int int_scale = _int_index_after_convI2L_scale;
|
||||
int long_scale = _scale / int_scale;
|
||||
|
||||
// If "int_index = iv", then the simple form is identical to the compound-long-int form.
|
||||
//
|
||||
// int_index = int_offset + int_invar + int_scale * iv
|
||||
// = 0 0 1 * iv
|
||||
// = iv
|
||||
if (int_offset == 0 && int_invar == nullptr && int_scale == 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Intuition: What happens if the int_index overflows? Let us look at two pointers on the "overflow edge":
|
||||
//
|
||||
// pointer1 = adr + ConvI2L(int_index1)
|
||||
// pointer2 = adr + ConvI2L(int_index2)
|
||||
//
|
||||
// int_index1 = max_int + 0 = max_int -> very close to but before the overflow
|
||||
// int_index2 = max_int + 1 = min_int -> just enough to get the overflow
|
||||
//
|
||||
// When looking at the difference of pointer1 and pointer2, we notice that it is very large
|
||||
// (almost 2^32). Since arrays have at most 2^31 elements, chances are high that pointer2 is
|
||||
// an actual out-of-bounds access at runtime. These would normally be prevented by range checks
|
||||
// at runtime. However, if the access was done by using Unsafe, where range checks are omitted,
|
||||
// then an out-of-bounds access constitutes undefined behavior. This means that we are allowed to
|
||||
// do anything, including changing the behavior.
|
||||
//
|
||||
// If we can set the right conditions, we have a guarantee that an overflow is either impossible
|
||||
// (no overflow or range checks preventing that) or undefined behavior. In both cases, we are
|
||||
// safe to do a vectorization.
|
||||
//
|
||||
// Approach: We want to prove a lower bound for the distance between these two pointers, and an
|
||||
// upper bound for the size of a memory object. We can derive such an upper bound for
|
||||
// arrays. We know they have at most 2^31 elements. If we know the size of the elements
|
||||
// in bytes, we have:
|
||||
//
|
||||
// array_element_size_in_bytes * 2^31 >= max_possible_array_size_in_bytes
|
||||
// >= array_size_in_bytes (ARR)
|
||||
//
|
||||
// If some small difference "delta" leads to an int_index overflow, we know that the
|
||||
// int_index1 before overflow must have been close to max_int, and the int_index2 after
|
||||
// the overflow must be close to min_int:
|
||||
//
|
||||
// pointer1 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index1)
|
||||
// =approx adr + long_offset + long_invar + long_scale * max_int
|
||||
//
|
||||
// pointer2 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index2)
|
||||
// =approx adr + long_offset + long_invar + long_scale * min_int
|
||||
//
|
||||
// We realize that the pointer difference is very large:
|
||||
//
|
||||
// difference =approx long_scale * 2^32
|
||||
//
|
||||
// Hence, if we set the right condition for long_scale and array_element_size_in_bytes,
|
||||
// we can prove that an overflow is impossible (or would imply undefined behaviour).
|
||||
//
|
||||
// We must now take this intuition, and develop a rigorous proof. We start by stating the problem
|
||||
// more precisely, with the help of some definitions and the Statement we are going to prove.
|
||||
//
|
||||
// Definition:
|
||||
// Two VPointers are "comparable" (i.e. VPointer::comparable is true, set with VPointer::cmp()),
|
||||
// iff all of these conditions apply for the simple form:
|
||||
// 1) Both VPointers are valid.
|
||||
// 2) The adr are identical, or both are array bases of different arrays.
|
||||
// 3) They have identical scale.
|
||||
// 4) They have identical invar.
|
||||
// 5) The difference in offsets is limited: abs(offset1 - offset2) < 2^31. (DIFF)
|
||||
//
|
||||
// For the Vectorization Optimization, we pair-wise compare VPointers and determine if they are:
|
||||
// 1) "not comparable":
|
||||
// We do not optimize them (assume they alias, not assume adjacency).
|
||||
//
|
||||
// Whenever we chose this option based on the simple form, it is also correct based on the
|
||||
// compound-long-int form, since we make no optimizations based on it.
|
||||
//
|
||||
// 2) "comparable" with different array bases at runtime:
|
||||
// We assume they do not alias (remove memory edges), but not assume adjacency.
|
||||
//
|
||||
// Whenever we have two different array bases for the simple form, we also have different
|
||||
// array bases for the compound-long-form. Since VPointers provably point to different
|
||||
// memory objects, they can never alias.
|
||||
//
|
||||
// 3) "comparable" with the same base address:
|
||||
// We compute the relative pointer difference, and based on the load/store size we can
|
||||
// compute aliasing and adjacency.
|
||||
//
|
||||
// We must find a condition under which the pointer difference of the simple form is
|
||||
// identical to the pointer difference of the compound-long-form. We do this with the
|
||||
// Statement below, which we then proceed to prove.
|
||||
//
|
||||
// Statement:
|
||||
// If two VPointers satisfy these 3 conditions:
|
||||
// 1) They are "comparable".
|
||||
// 2) They have the same base address.
|
||||
// 3) Their long_scale is a multiple of the array element size in bytes:
|
||||
//
|
||||
// abs(long_scale) % array_element_size_in_bytes = 0 (A)
|
||||
//
|
||||
// Then their pointer difference of the simple form is identical to the pointer difference
|
||||
// of the compound-long-int form.
|
||||
//
|
||||
// More precisely:
|
||||
// Such two VPointers by definition have identical adr, invar, and scale.
|
||||
// Their simple form is:
|
||||
//
|
||||
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv) (B1)
|
||||
// s_pointer2 = adr + offset2 + invar + scale * ConvI2L(iv) (B2)
|
||||
//
|
||||
// Thus, the pointer difference of the simple forms collapses to the difference in offsets:
|
||||
//
|
||||
// s_difference = s_pointer1 - s_pointer2 = offset1 - offset2 (C)
|
||||
//
|
||||
// Their compound-long-int form for these VPointer is:
|
||||
//
|
||||
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1) (D1)
|
||||
// int_index1 = int_offset1 + int_invar1 + int_scale1 * iv (D2)
|
||||
//
|
||||
// c_pointer2 = adr + long_offset2 + long_invar2 + long_scale2 * ConvI2L(int_index2) (D3)
|
||||
// int_index2 = int_offset2 + int_invar2 + int_scale2 * iv (D4)
|
||||
//
|
||||
// And these are the offset1, offset2, invar and scale from the simple form (B1) and (B2):
|
||||
//
|
||||
// offset1 = long_offset1 + long_scale1 * ConvI2L(int_offset1) (D5)
|
||||
// offset2 = long_offset2 + long_scale2 * ConvI2L(int_offset2) (D6)
|
||||
//
|
||||
// invar = long_invar1 + long_scale1 * ConvI2L(int_invar1)
|
||||
// = long_invar2 + long_scale2 * ConvI2L(int_invar2) (D7)
|
||||
//
|
||||
// scale = long_scale1 * ConvI2L(int_scale1)
|
||||
// = long_scale2 * ConvI2L(int_scale2) (D8)
|
||||
//
|
||||
// The pointer difference of the compound-long-int form is defined as:
|
||||
//
|
||||
// c_difference = c_pointer1 - c_pointer2
|
||||
//
|
||||
// Thus, the statement claims that for the two VPointer we have:
|
||||
//
|
||||
// s_difference = c_difference (Statement)
|
||||
//
|
||||
// We prove the Statement with the help of a Lemma:
|
||||
//
|
||||
// Lemma:
|
||||
// There is some integer x, such that:
|
||||
//
|
||||
// c_difference = s_difference + array_element_size_in_bytes * x * 2^32 (Lemma)
|
||||
//
|
||||
// From condition (DIFF), we can derive:
|
||||
//
|
||||
// abs(s_difference) < 2^31 (E)
|
||||
//
|
||||
// Assuming the Lemma, we prove the Statement:
|
||||
// If "x = 0" (intuitively: the int_index does not overflow), then:
|
||||
// c_difference = s_difference
|
||||
// and hence the simple form computes the same pointer difference as the compound-long-int form.
|
||||
// If "x != 0" (intuitively: the int_index overflows), then:
|
||||
// abs(c_difference) >= abs(s_difference + array_element_size_in_bytes * x * 2^32)
|
||||
// >= array_element_size_in_bytes * 2^32 - abs(s_difference)
|
||||
// -- apply (E) --
|
||||
// > array_element_size_in_bytes * 2^32 - 2^31
|
||||
// >= array_element_size_in_bytes * 2^31
|
||||
// -- apply (ARR) --
|
||||
// >= max_possible_array_size_in_bytes
|
||||
// >= array_size_in_bytes
|
||||
//
|
||||
// This shows that c_pointer1 and c_pointer2 have a distance that exceeds the maximum array size.
|
||||
// Thus, at least one of the two pointers must be outside of the array bounds. But we can assume
|
||||
// that out-of-bounds accesses do not happen. If they still do, it is undefined behavior. Hence,
|
||||
// we are allowed to do anything. We can also "safely" use the simple form in this case even though
|
||||
// it might not match the compound-long-int form at runtime.
|
||||
// QED Statement.
|
||||
//
|
||||
// We must now prove the Lemma.
|
||||
//
|
||||
// ConvI2L always truncates by some power of 2^32, i.e. there is some integer y such that:
|
||||
//
|
||||
// ConvI2L(y1 + y2) = ConvI2L(y1) + ConvI2L(y2) + 2^32 * y (F)
|
||||
//
|
||||
// It follows, that there is an integer y1 such that:
|
||||
//
|
||||
// ConvI2L(int_index1) = ConvI2L(int_offset1 + int_invar1 + int_scale1 * iv)
|
||||
// -- apply (F) --
|
||||
// = ConvI2L(int_offset1)
|
||||
// + ConvI2L(int_invar1)
|
||||
// + ConvI2L(int_scale1) * ConvI2L(iv)
|
||||
// + y1 * 2^32 (G)
|
||||
//
|
||||
// Thus, we can write the compound-long-int form (D1) as:
|
||||
//
|
||||
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1)
|
||||
// -- apply (G) --
|
||||
// = adr
|
||||
// + long_offset1
|
||||
// + long_invar1
|
||||
// + long_scale1 * ConvI2L(int_offset1)
|
||||
// + long_scale1 * ConvI2L(int_invar1)
|
||||
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv)
|
||||
// + long_scale1 * y1 * 2^32 (H)
|
||||
//
|
||||
// And we can write the simple form as:
|
||||
//
|
||||
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv)
|
||||
// -- apply (D5, D7, D8) --
|
||||
// = adr
|
||||
// + long_offset1
|
||||
// + long_scale1 * ConvI2L(int_offset1)
|
||||
// + long_invar1
|
||||
// + long_scale1 * ConvI2L(int_invar1)
|
||||
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv) (K)
|
||||
//
|
||||
// We now compute the pointer difference between the simple (K) and compound-long-int form (H).
|
||||
// Most terms cancel out immediately:
|
||||
//
|
||||
// sc_difference1 = c_pointer1 - s_pointer1 = long_scale1 * y1 * 2^32 (L)
|
||||
//
|
||||
// Rearranging the equation (L), we get:
|
||||
//
|
||||
// c_pointer1 = s_pointer1 + long_scale1 * y1 * 2^32 (M)
|
||||
//
|
||||
// And since long_scale1 is a multiple of array_element_size_in_bytes, there is some integer
|
||||
// x1, such that (M) implies:
|
||||
//
|
||||
// c_pointer1 = s_pointer1 + array_element_size_in_bytes * x1 * 2^32 (N)
|
||||
//
|
||||
// With an analogue equation for c_pointer2, we can now compute the pointer difference for
|
||||
// the compound-long-int form:
|
||||
//
|
||||
// c_difference = c_pointer1 - c_pointer2
|
||||
// -- apply (N) --
|
||||
// = s_pointer1 + array_element_size_in_bytes * x1 * 2^32
|
||||
// -(s_pointer2 + array_element_size_in_bytes * x2 * 2^32)
|
||||
// -- where "x = x1 - x2" --
|
||||
// = s_pointer1 - s_pointer2 + array_element_size_in_bytes * x * 2^32
|
||||
// -- apply (C) --
|
||||
// = s_difference + array_element_size_in_bytes * x * 2^32
|
||||
// QED Lemma.
|
||||
if (ary_ptr_t != nullptr) {
|
||||
BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type();
|
||||
if (is_java_primitive(array_element_bt)) {
|
||||
int array_element_size_in_bytes = type2aelembytes(array_element_bt);
|
||||
if (abs(long_scale) % array_element_size_in_bytes == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// General case: we do not know if it is safe to use the simple form.
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool VPointer::is_loop_member(Node* n) const {
|
||||
Node* n_c = phase()->get_ctrl(n);
|
||||
return lpt()->is_member(phase()->get_loop(n_c));
|
||||
@ -632,6 +993,37 @@ bool VPointer::scaled_iv(Node* n) {
|
||||
NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
|
||||
return true;
|
||||
}
|
||||
} else if (opc == Op_ConvI2L && !has_iv()) {
|
||||
// So far we have not found the iv yet, and are about to enter a ConvI2L subgraph,
|
||||
// which may be the int index (that might overflow) for the memory access, of the form:
|
||||
//
|
||||
// int_index = int_offset + int_invar + int_scale * iv
|
||||
//
|
||||
// If we simply continue parsing with the current VPointer, then the int_offset and
|
||||
// int_invar simply get added to the long offset and invar. But for the checks in
|
||||
// VPointer::is_safe_to_use_as_simple_form() we need to have explicit access to the
|
||||
// int_index. Thus, we must parse it explicitly here. For this, we use a temporary
|
||||
// VPointer, to pattern match the int_index sub-expression of the address.
|
||||
|
||||
NOT_PRODUCT(Tracer::Depth dddd;)
|
||||
VPointer tmp(this);
|
||||
NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
|
||||
|
||||
if (tmp.scaled_iv_plus_offset(n->in(1)) && tmp.has_iv()) {
|
||||
// We successfully matched an integer index, of the form:
|
||||
// int_index = int_offset + int_invar + int_scale * iv
|
||||
_has_int_index_after_convI2L = true;
|
||||
_int_index_after_convI2L_offset = tmp._offset;
|
||||
_int_index_after_convI2L_invar = tmp._invar;
|
||||
_int_index_after_convI2L_scale = tmp._scale;
|
||||
}
|
||||
|
||||
// Now parse it again for the real VPointer. This makes sure that the int_offset, int_invar,
|
||||
// and int_scale are properly added to the final VPointer's offset, invar, and scale.
|
||||
if (scaled_iv_plus_offset(n->in(1))) {
|
||||
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
|
||||
return true;
|
||||
}
|
||||
} else if (opc == Op_ConvI2L || opc == Op_CastII) {
|
||||
if (scaled_iv_plus_offset(n->in(1))) {
|
||||
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
|
||||
@ -648,8 +1040,17 @@ bool VPointer::scaled_iv(Node* n) {
|
||||
|
||||
if (tmp.scaled_iv_plus_offset(n->in(1))) {
|
||||
int scale = n->in(2)->get_int();
|
||||
// Accumulate scale.
|
||||
_scale = tmp._scale << scale;
|
||||
_offset += tmp._offset << scale;
|
||||
// Accumulate offset.
|
||||
int shifted_offset = 0;
|
||||
if (!try_LShiftI_no_overflow(tmp._offset, scale, shifted_offset)) {
|
||||
return false; // shift overflow.
|
||||
}
|
||||
if (!try_AddI_no_overflow(_offset, shifted_offset, _offset)) {
|
||||
return false; // add overflow.
|
||||
}
|
||||
// Accumulate invar.
|
||||
if (tmp._invar != nullptr) {
|
||||
BasicType bt = tmp._invar->bottom_type()->basic_type();
|
||||
assert(bt == T_INT || bt == T_LONG, "");
|
||||
@ -658,6 +1059,13 @@ bool VPointer::scaled_iv(Node* n) {
|
||||
_debug_invar_scale = n->in(2);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Forward info about the int_index:
|
||||
_has_int_index_after_convI2L = tmp._has_int_index_after_convI2L;
|
||||
_int_index_after_convI2L_offset = tmp._int_index_after_convI2L_offset;
|
||||
_int_index_after_convI2L_invar = tmp._int_index_after_convI2L_invar;
|
||||
_int_index_after_convI2L_scale = tmp._int_index_after_convI2L_scale;
|
||||
|
||||
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);)
|
||||
return true;
|
||||
}
|
||||
@ -675,7 +1083,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
||||
|
||||
int opc = n->Opcode();
|
||||
if (opc == Op_ConI) {
|
||||
_offset += negate ? -(n->get_int()) : n->get_int();
|
||||
if (!try_AddSubI_no_overflow(_offset, n->get_int(), negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
|
||||
return true;
|
||||
} else if (opc == Op_ConL) {
|
||||
@ -684,7 +1094,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
||||
if (t->higher_equal(TypeLong::INT)) {
|
||||
jlong loff = n->get_long();
|
||||
jint off = (jint)loff;
|
||||
_offset += negate ? -off : loff;
|
||||
if (!try_AddSubI_no_overflow(_offset, off, negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
|
||||
return true;
|
||||
}
|
||||
@ -699,11 +1111,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
||||
if (opc == Op_AddI) {
|
||||
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
||||
maybe_add_to_invar(n->in(1), negate);
|
||||
_offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
|
||||
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);)
|
||||
return true;
|
||||
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
||||
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
|
||||
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
maybe_add_to_invar(n->in(2), negate);
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);)
|
||||
return true;
|
||||
@ -712,11 +1128,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
||||
if (opc == Op_SubI) {
|
||||
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
||||
maybe_add_to_invar(n->in(1), negate);
|
||||
_offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
|
||||
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), !negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);)
|
||||
return true;
|
||||
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
||||
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
|
||||
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
|
||||
return false; // add/sub overflow.
|
||||
}
|
||||
maybe_add_to_invar(n->in(2), !negate);
|
||||
NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);)
|
||||
return true;
|
||||
@ -806,6 +1226,44 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
|
||||
_invar = register_if_new(add);
|
||||
}
|
||||
|
||||
bool VPointer::try_AddI_no_overflow(int offset1, int offset2, int& result) {
|
||||
jlong long_offset = java_add((jlong)(offset1), (jlong)(offset2));
|
||||
jint int_offset = java_add( offset1, offset2);
|
||||
if (long_offset != int_offset) {
|
||||
return false;
|
||||
}
|
||||
result = int_offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VPointer::try_SubI_no_overflow(int offset1, int offset2, int& result) {
|
||||
jlong long_offset = java_subtract((jlong)(offset1), (jlong)(offset2));
|
||||
jint int_offset = java_subtract( offset1, offset2);
|
||||
if (long_offset != int_offset) {
|
||||
return false;
|
||||
}
|
||||
result = int_offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VPointer::try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result) {
|
||||
if (is_sub) {
|
||||
return try_SubI_no_overflow(offset1, offset2, result);
|
||||
} else {
|
||||
return try_AddI_no_overflow(offset1, offset2, result);
|
||||
}
|
||||
}
|
||||
|
||||
bool VPointer::try_LShiftI_no_overflow(int offset, int shift, int& result) {
|
||||
jlong long_offset = java_shift_left((jlong)(offset), shift);
|
||||
jint int_offset = java_shift_left( offset, shift);
|
||||
if (long_offset != int_offset) {
|
||||
return false;
|
||||
}
|
||||
result = int_offset;
|
||||
return true;
|
||||
}
|
||||
|
||||
// We use two comparisons, because a subtraction could underflow.
|
||||
#define RETURN_CMP_VALUE_IF_NOT_EQUAL(a, b) \
|
||||
if (a < b) { return -1; } \
|
||||
|
@ -670,13 +670,51 @@ private:
|
||||
// A vectorization pointer (VPointer) has information about an address for
|
||||
// dependence checking and vector alignment. It's usually bound to a memory
|
||||
// operation in a counted loop for vectorizable analysis.
|
||||
//
|
||||
// We parse and represent pointers of the simple form:
|
||||
//
|
||||
// pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||
//
|
||||
// Where:
|
||||
//
|
||||
// adr: the base address of an array (base = adr)
|
||||
// OR
|
||||
// some address to off-heap memory (base = TOP)
|
||||
//
|
||||
// offset: a constant offset
|
||||
// invar: a runtime variable, which is invariant during the loop
|
||||
// scale: scaling factor
|
||||
// iv: loop induction variable
|
||||
//
|
||||
// But more precisely, we parse the composite-long-int form:
|
||||
//
|
||||
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + inv_invar + int_scale * iv)
|
||||
//
|
||||
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
|
||||
// int_index = int_offset + int_invar + int_scale * iv
|
||||
//
|
||||
// However, for aliasing and adjacency checks (e.g. VPointer::cmp()) we always use the simple form to make
|
||||
// decisions. Hence, we must make sure to only create a "valid" VPointer if the optimisations based on the
|
||||
// simple form produce the same result as the compound-long-int form would. Intuitively, this depends on
|
||||
// if the int_index overflows, but the precise conditions are given in VPointer::is_safe_to_use_as_simple_form().
|
||||
//
|
||||
// ConvI2L(int_index) = ConvI2L(int_offset + int_invar + int_scale * iv)
|
||||
// = Convi2L(int_offset) + ConvI2L(int_invar) + ConvI2L(int_scale) * ConvI2L(iv)
|
||||
//
|
||||
// scale = long_scale * ConvI2L(int_scale)
|
||||
// offset = long_offset + long_scale * ConvI2L(int_offset)
|
||||
// invar = long_invar + long_scale * ConvI2L(int_invar)
|
||||
//
|
||||
// pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||
//
|
||||
class VPointer : public ArenaObj {
|
||||
protected:
|
||||
MemNode* const _mem; // My memory reference node
|
||||
const VLoop& _vloop;
|
||||
|
||||
Node* _base; // null if unsafe nonheap reference
|
||||
Node* _adr; // address pointer
|
||||
// Components of the simple form:
|
||||
Node* _base; // Base address of an array OR null if some off-heap memory.
|
||||
Node* _adr; // Same as _base if an array pointer OR some off-heap memory pointer.
|
||||
int _scale; // multiplier for iv (in bytes), 0 if no loop iv
|
||||
int _offset; // constant offset (in bytes)
|
||||
|
||||
@ -687,6 +725,13 @@ class VPointer : public ArenaObj {
|
||||
Node* _debug_invar_scale; // multiplier for invariant
|
||||
#endif
|
||||
|
||||
// The int_index components of the compound-long-int form. Used to decide if it is safe to use the
|
||||
// simple form rather than the compound-long-int form that was parsed.
|
||||
bool _has_int_index_after_convI2L;
|
||||
int _int_index_after_convI2L_offset;
|
||||
Node* _int_index_after_convI2L_invar;
|
||||
int _int_index_after_convI2L_scale;
|
||||
|
||||
Node_Stack* _nstack; // stack used to record a vpointer trace of variants
|
||||
bool _analyze_only; // Used in loop unrolling only for vpointer trace
|
||||
uint _stack_idx; // Used in loop unrolling only for vpointer trace
|
||||
@ -726,6 +771,8 @@ class VPointer : public ArenaObj {
|
||||
VPointer(VPointer* p);
|
||||
NONCOPYABLE(VPointer);
|
||||
|
||||
bool is_safe_to_use_as_simple_form(Node* base, Node* adr) const;
|
||||
|
||||
public:
|
||||
bool valid() const { return _adr != nullptr; }
|
||||
bool has_iv() const { return _scale != 0; }
|
||||
@ -751,10 +798,43 @@ class VPointer : public ArenaObj {
|
||||
return _invar == q._invar;
|
||||
}
|
||||
|
||||
// We compute if and how two VPointers can alias at runtime, i.e. if the two addressed regions of memory can
|
||||
// ever overlap. There are essentially 3 relevant return states:
|
||||
// - NotComparable: Synonymous to "unknown aliasing".
|
||||
// We have no information about how the two VPointers can alias. They could overlap, refer
|
||||
// to another location in the same memory object, or point to a completely different object.
|
||||
// -> Memory edge required. Aliasing unlikely but possible.
|
||||
//
|
||||
// - Less / Greater: Synonymous to "never aliasing".
|
||||
// The two VPointers may point into the same memory object, but be non-aliasing (i.e. we
|
||||
// know both address regions inside the same memory object, but these regions are non-
|
||||
// overlapping), or the VPointers point to entirely different objects.
|
||||
// -> No memory edge required. Aliasing impossible.
|
||||
//
|
||||
// - Equal: Synonymous to "overlap, or point to different memory objects".
|
||||
// The two VPointers either overlap on the same memory object, or point to two different
|
||||
// memory objects.
|
||||
// -> Memory edge required. Aliasing likely.
|
||||
//
|
||||
// In a future refactoring, we can simplify to two states:
|
||||
// - NeverAlias: instead of Less / Greater
|
||||
// - MayAlias: instead of Equal / NotComparable
|
||||
//
|
||||
// Two VPointer are "comparable" (Less / Greater / Equal), iff all of these conditions apply:
|
||||
// 1) Both are valid, i.e. expressible in the compound-long-int or simple form.
|
||||
// 2) The adr are identical, or both are array bases of different arrays.
|
||||
// 3) They have identical scale.
|
||||
// 4) They have identical invar.
|
||||
// 5) The difference in offsets is limited: abs(offset0 - offset1) < 2^31.
|
||||
int cmp(const VPointer& q) const {
|
||||
if (valid() && q.valid() &&
|
||||
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
|
||||
_scale == q._scale && invar_equals(q)) {
|
||||
jlong difference = abs(java_subtract((jlong)_offset, (jlong)q._offset));
|
||||
jlong max_diff = (jlong)1 << 31;
|
||||
if (difference >= max_diff) {
|
||||
return NotComparable;
|
||||
}
|
||||
bool overlap = q._offset < _offset + memory_size() &&
|
||||
_offset < q._offset + q.memory_size();
|
||||
return overlap ? Equal : (_offset < q._offset ? Less : Greater);
|
||||
@ -859,6 +939,11 @@ class VPointer : public ArenaObj {
|
||||
|
||||
void maybe_add_to_invar(Node* new_invar, bool negate);
|
||||
|
||||
static bool try_AddI_no_overflow(int offset1, int offset2, int& result);
|
||||
static bool try_SubI_no_overflow(int offset1, int offset2, int& result);
|
||||
static bool try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result);
|
||||
static bool try_LShiftI_no_overflow(int offset1, int offset2, int& result);
|
||||
|
||||
Node* register_if_new(Node* n) const;
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -35,7 +36,6 @@ import java.nio.ByteOrder;
|
||||
* @test
|
||||
* @bug 8300258
|
||||
* @key randomness
|
||||
* @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64")
|
||||
* @summary C2: vectorization fails on simple ByteBuffer loop
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib /
|
||||
@ -147,193 +147,420 @@ public class TestVectorizationMismatchedAccess {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteLong1(byte[] dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteLong1a(byte[] dest, long[] src) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteLong1")
|
||||
public static void testByteLong1_runner() {
|
||||
runAndVerify(() -> testByteLong1(byteArray, longArray), 0);
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteLong1b(byte[] dest, long[] src) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteLong2(byte[] dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
|
||||
public static void testByteLong1c(byte[] dest, long[] src) {
|
||||
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
|
||||
for (int i = 0; i < src.length - 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, base + 8 * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteLong1d(byte[] dest, long[] src) {
|
||||
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
|
||||
for (int i = 0; i < src.length - 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, base + 8L * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"})
|
||||
public static void testByteLong1_runner() {
|
||||
runAndVerify(() -> testByteLong1a(byteArray, longArray), 0);
|
||||
runAndVerify(() -> testByteLong1b(byteArray, longArray), 0);
|
||||
testByteLong1c(byteArray, longArray);
|
||||
testByteLong1d(byteArray, longArray);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteLong2a(byte[] dest, long[] src) {
|
||||
for (int i = 1; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteLong2")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteLong2b(byte[] dest, long[] src) {
|
||||
for (int i = 1; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteLong2a", "testByteLong2b"})
|
||||
public static void testByteLong2_runner() {
|
||||
runAndVerify(() -> testByteLong2(byteArray, longArray), -8);
|
||||
runAndVerify(() -> testByteLong2a(byteArray, longArray), -8);
|
||||
runAndVerify(() -> testByteLong2b(byteArray, longArray), -8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteLong3(byte[] dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteLong3a(byte[] dest, long[] src) {
|
||||
for (int i = 0; i < src.length - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteLong3")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteLong3b(byte[] dest, long[] src) {
|
||||
for (int i = 0; i < src.length - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteLong3a", "testByteLong3b"})
|
||||
public static void testByteLong3_runner() {
|
||||
runAndVerify(() -> testByteLong3(byteArray, longArray), 8);
|
||||
runAndVerify(() -> testByteLong3a(byteArray, longArray), 8);
|
||||
runAndVerify(() -> testByteLong3b(byteArray, longArray), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIf = {"AlignVector", "false"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
// AlignVector cannot guarantee that invar is aligned.
|
||||
public static void testByteLong4(byte[] dest, long[] src, int start, int stop) {
|
||||
public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteLong4")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"},
|
||||
applyIf = {"AlignVector", "false"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
// AlignVector cannot guarantee that invar is aligned.
|
||||
public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteLong4a", "testByteLong4b"})
|
||||
public static void testByteLong4_runner() {
|
||||
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||
runAndVerify(() -> testByteLong4(byteArray, longArray, 0, size), 0);
|
||||
runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0);
|
||||
runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteLong5(byte[] dest, long[] src, int start, int stop) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteLong5")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteLong5a", "testByteLong5b"})
|
||||
public static void testByteLong5_runner() {
|
||||
baseOffset = 1;
|
||||
runAndVerify(() -> testByteLong5(byteArray, longArray, 0, size-1), 8);
|
||||
runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8);
|
||||
runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteByte1(byte[] dest, byte[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteByte1a(byte[] dest, byte[] src) {
|
||||
for (int i = 0; i < src.length / 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteByte1")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteByte1b(byte[] dest, byte[] src) {
|
||||
for (int i = 0; i < src.length / 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteByte1a", "testByteByte1b"})
|
||||
public static void testByteByte1_runner() {
|
||||
runAndVerify2(() -> testByteByte1(byteArray, byteArray), 0);
|
||||
runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0);
|
||||
runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testByteByte2(byte[] dest, byte[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||
// might get fixed with JDK-8325155.
|
||||
public static void testByteByte2a(byte[] dest, byte[] src) {
|
||||
for (int i = 1; i < src.length / 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteByte2")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||
public static void testByteByte2b(byte[] dest, byte[] src) {
|
||||
for (int i = 1; i < src.length / 8; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteByte2a", "testByteByte2b"})
|
||||
public static void testByteByte2_runner() {
|
||||
runAndVerify2(() -> testByteByte2(byteArray, byteArray), -8);
|
||||
runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8);
|
||||
runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte3(byte[] dest, byte[] src) {
|
||||
public static void testByteByte3a(byte[] dest, byte[] src) {
|
||||
for (int i = 0; i < src.length / 8 - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteByte3")
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte3b(byte[] dest, byte[] src) {
|
||||
for (int i = 0; i < src.length / 8 - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteByte3a", "testByteByte3b"})
|
||||
public static void testByteByte3_runner() {
|
||||
runAndVerify2(() -> testByteByte3(byteArray, byteArray), 8);
|
||||
runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8);
|
||||
runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte4(byte[] dest, byte[] src, int start, int stop) {
|
||||
public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteByte4")
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteByte4a", "testByteByte4b"})
|
||||
public static void testByteByte4_runner() {
|
||||
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||
runAndVerify2(() -> testByteByte4(byteArray, byteArray, 0, size), 0);
|
||||
runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0);
|
||||
runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte5(byte[] dest, byte[] src, int start, int stop) {
|
||||
public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testByteByte5")
|
||||
@Test
|
||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||
public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testByteByte5a", "testByteByte5b"})
|
||||
public static void testByteByte5_runner() {
|
||||
baseOffset = 1;
|
||||
runAndVerify2(() -> testByteByte5(byteArray, byteArray, 0, size-1), 8);
|
||||
runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8);
|
||||
runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testOffHeapLong1(long dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P(dest + 8 * (i + int_con))
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong1a(long dest, long[] src) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8 * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testOffHeapLong1")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P(dest + 8L * (i + int_con))
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong1b(long dest, long[] src) {
|
||||
for (int i = 0; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8L * i, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"})
|
||||
public static void testOffHeapLong1_runner() {
|
||||
runAndVerify3(() -> testOffHeapLong1(baseOffHeap, longArray), 0);
|
||||
runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0);
|
||||
runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testOffHeapLong2(long dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong2a(long dest, long[] src) {
|
||||
for (int i = 1; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testOffHeapLong2")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong2b(long dest, long[] src) {
|
||||
for (int i = 1; i < src.length; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"})
|
||||
public static void testOffHeapLong2_runner() {
|
||||
runAndVerify3(() -> testOffHeapLong2(baseOffHeap, longArray), -8);
|
||||
runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8);
|
||||
runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
public static void testOffHeapLong3(long dest, long[] src) {
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong3a(long dest, long[] src) {
|
||||
for (int i = 0; i < src.length - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testOffHeapLong3")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
public static void testOffHeapLong3b(long dest, long[] src) {
|
||||
for (int i = 0; i < src.length - 1; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"})
|
||||
public static void testOffHeapLong3_runner() {
|
||||
runAndVerify3(() -> testOffHeapLong3(baseOffHeap, longArray), 8);
|
||||
runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8);
|
||||
runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8);
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
applyIf = {"AlignVector", "false"})
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
// applyIf = {"AlignVector", "false"})
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
// AlignVector cannot guarantee that invar is aligned.
|
||||
public static void testOffHeapLong4(long dest, long[] src, int start, int stop) {
|
||||
public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = "testOffHeapLong4")
|
||||
@Test
|
||||
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||
// applyIf = {"AlignVector", "false"})
|
||||
// FAILS: adr is CastX2P
|
||||
// See: JDK-8331576
|
||||
// AlignVector cannot guarantee that invar is aligned.
|
||||
public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) {
|
||||
for (int i = start; i < stop; i++) {
|
||||
UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"})
|
||||
public static void testOffHeapLong4_runner() {
|
||||
baseOffset = 8;
|
||||
runAndVerify3(() -> testOffHeapLong4(baseOffHeap, longArray, 0, size-1), 8);
|
||||
runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8);
|
||||
runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8);
|
||||
}
|
||||
}
|
||||
|
@ -1363,7 +1363,7 @@ public class TestAlignVector {
|
||||
static Object[] test17a(long[] a) {
|
||||
// Unsafe: vectorizes with profiling (not xcomp)
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
||||
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
|
||||
long v = UNSAFE.getLongUnaligned(a, adr);
|
||||
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||||
}
|
||||
@ -1375,7 +1375,7 @@ public class TestAlignVector {
|
||||
static Object[] test17b(long[] a) {
|
||||
// Not alignable
|
||||
for (int i = 0; i < RANGE-1; i++) {
|
||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
||||
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
|
||||
long v = UNSAFE.getLongUnaligned(a, adr);
|
||||
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||||
}
|
||||
@ -1392,7 +1392,7 @@ public class TestAlignVector {
|
||||
static Object[] test17c(long[] a) {
|
||||
// Unsafe: aligned vectorizes
|
||||
for (int i = 0; i < RANGE-1; i+=4) {
|
||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
||||
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
|
||||
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||||
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||||
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||||
@ -1422,7 +1422,7 @@ public class TestAlignVector {
|
||||
static Object[] test17d(long[] a) {
|
||||
// Not alignable
|
||||
for (int i = 0; i < RANGE-1; i+=4) {
|
||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
||||
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
|
||||
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||||
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||||
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||||
|
@ -1090,11 +1090,11 @@ public class TestAlignVectorFuzzer {
|
||||
int init = init_con_or_var();
|
||||
int limit = limit_con_or_var();
|
||||
int stride = stride_con();
|
||||
int scale = scale_con();
|
||||
int offset = offset1_con_or_var();
|
||||
long scale = scale_con();
|
||||
long offset = offset1_con_or_var();
|
||||
|
||||
for (int i = init; i < limit; i += stride) {
|
||||
int adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
|
||||
long adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
|
||||
int v = UNSAFE.getIntUnaligned(a, adr);
|
||||
UNSAFE.putIntUnaligned(a, adr, v + 1);
|
||||
}
|
||||
@ -1105,19 +1105,19 @@ public class TestAlignVectorFuzzer {
|
||||
int init = init_con_or_var();
|
||||
int limit = limit_con_or_var();
|
||||
int stride = stride_con();
|
||||
int scale = scale_con();
|
||||
int offset1 = offset1_con_or_var();
|
||||
int offset2 = offset2_con_or_var();
|
||||
int offset3 = offset3_con_or_var();
|
||||
long scale = scale_con();
|
||||
long offset1 = offset1_con_or_var();
|
||||
long offset2 = offset2_con_or_var();
|
||||
long offset3 = offset3_con_or_var();
|
||||
|
||||
int h1 = hand_unrolling1_con();
|
||||
int h2 = hand_unrolling2_con();
|
||||
int h3 = hand_unrolling3_con();
|
||||
|
||||
for (int i = init; i < limit; i += stride) {
|
||||
int adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
|
||||
int adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
|
||||
int adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
|
||||
long adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
|
||||
long adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
|
||||
long adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
|
||||
|
||||
if (h1 >= 1) { UNSAFE.putIntUnaligned(a, adr1 + 0*4, UNSAFE.getIntUnaligned(a, adr1 + 0*4) + 1); }
|
||||
if (h1 >= 2) { UNSAFE.putIntUnaligned(a, adr1 + 1*4, UNSAFE.getIntUnaligned(a, adr1 + 1*4) + 1); }
|
||||
|
@ -172,10 +172,10 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// int and float arrays are two slices. But we pretend both are of type int.
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -248,10 +248,10 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// same as test2, except that reordering leads to different semantics
|
||||
// explanation analogue to test4
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
|
||||
}
|
||||
}
|
||||
|
||||
@ -275,18 +275,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Chain of parallelizable op and conversion
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
}
|
||||
}
|
||||
|
||||
@ -307,18 +307,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Cycle involving 3 memory slices
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
@ -340,19 +340,19 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// 2-cycle, with more ops after
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
// more stuff after
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
}
|
||||
}
|
||||
|
||||
@ -373,19 +373,19 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// 2-cycle, with more stuff before
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
// 2-cycle
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
@ -423,18 +423,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
//
|
||||
// The cycle thus does not only go via packs, but also scalar ops.
|
||||
//
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
@ -463,8 +463,8 @@ public class TestIndependentPacksWithCyclicDependency {
|
||||
|
||||
static void verify(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||
if (datav != goldv) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||
}
|
||||
|
@ -58,18 +58,18 @@ public class TestIndependentPacksWithCyclicDependency2 {
|
||||
long[] dataLa, long[] dataLb) {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// For explanation, see test 10 in TestIndependentPacksWithCyclicDependency.java
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43;
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||
}
|
||||
}
|
||||
|
||||
@ -83,8 +83,8 @@ public class TestIndependentPacksWithCyclicDependency2 {
|
||||
|
||||
static void verify(String name, float[] data, float[] gold) {
|
||||
for (int i = 0; i < RANGE; i++) {
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||
if (datav != goldv) {
|
||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||
}
|
||||
|
@ -124,10 +124,10 @@ public class TestScheduleReordersScalarMemops {
|
||||
for (int i = 0; i < RANGE; i+=2) {
|
||||
// Do the same as test0, but without int-float conversion.
|
||||
// This should reproduce on machines where conversion is not implemented.
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A +1
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] * 11); // B *11
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A +1
|
||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
|
||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
|
||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user