8328544: Improve handling of vectorization
Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org> Reviewed-by: mschoene, kvn, chagedorn, rhalade
This commit is contained in:
parent
03bc6b359f
commit
cfa25b71a6
@ -416,6 +416,10 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
|
|||||||
#ifdef ASSERT
|
#ifdef ASSERT
|
||||||
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
||||||
#endif
|
#endif
|
||||||
|
_has_int_index_after_convI2L(false),
|
||||||
|
_int_index_after_convI2L_offset(0),
|
||||||
|
_int_index_after_convI2L_invar(nullptr),
|
||||||
|
_int_index_after_convI2L_scale(0),
|
||||||
_nstack(nstack), _analyze_only(analyze_only), _stack_idx(0)
|
_nstack(nstack), _analyze_only(analyze_only), _stack_idx(0)
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
, _tracer(vloop.is_trace_pointer_analysis())
|
, _tracer(vloop.is_trace_pointer_analysis())
|
||||||
@ -495,6 +499,11 @@ VPointer::VPointer(MemNode* const mem, const VLoop& vloop,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!is_safe_to_use_as_simple_form(base, adr)) {
|
||||||
|
assert(!valid(), "does not have simple form");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
_base = base;
|
_base = base;
|
||||||
_adr = adr;
|
_adr = adr;
|
||||||
assert(valid(), "Usable");
|
assert(valid(), "Usable");
|
||||||
@ -508,6 +517,10 @@ VPointer::VPointer(VPointer* p) :
|
|||||||
#ifdef ASSERT
|
#ifdef ASSERT
|
||||||
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
_debug_invar(nullptr), _debug_negate_invar(false), _debug_invar_scale(nullptr),
|
||||||
#endif
|
#endif
|
||||||
|
_has_int_index_after_convI2L(false),
|
||||||
|
_int_index_after_convI2L_offset(0),
|
||||||
|
_int_index_after_convI2L_invar(nullptr),
|
||||||
|
_int_index_after_convI2L_scale(0),
|
||||||
_nstack(p->_nstack), _analyze_only(p->_analyze_only), _stack_idx(p->_stack_idx)
|
_nstack(p->_nstack), _analyze_only(p->_analyze_only), _stack_idx(p->_stack_idx)
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
, _tracer(p->_tracer._is_trace_alignment)
|
, _tracer(p->_tracer._is_trace_alignment)
|
||||||
@ -530,6 +543,354 @@ int VPointer::invar_factor() const {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We would like to make decisions about aliasing (i.e. removing memory edges) and adjacency
|
||||||
|
// (i.e. which loads/stores can be packed) based on the simple form:
|
||||||
|
//
|
||||||
|
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||||
|
//
|
||||||
|
// However, we parse the compound-long-int form:
|
||||||
|
//
|
||||||
|
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
|
||||||
|
// int_index = int_offset + int_invar + int_scale * iv
|
||||||
|
//
|
||||||
|
// In general, the simple and the compound-long-int form do not always compute the same pointer
|
||||||
|
// at runtime. For example, the simple form would give a different result due to an overflow
|
||||||
|
// in the int_index.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
// For both forms, we have:
|
||||||
|
// iv = 0
|
||||||
|
// scale = 1
|
||||||
|
//
|
||||||
|
// We now account the offset and invar once to the long part and once to the int part:
|
||||||
|
// Pointer 1 (long offset and long invar):
|
||||||
|
// long_offset = min_int
|
||||||
|
// long_invar = min_int
|
||||||
|
// int_offset = 0
|
||||||
|
// int_invar = 0
|
||||||
|
//
|
||||||
|
// Pointer 2 (int offset and int invar):
|
||||||
|
// long_offset = 0
|
||||||
|
// long_invar = 0
|
||||||
|
// int_offset = min_int
|
||||||
|
// int_invar = min_int
|
||||||
|
//
|
||||||
|
// This gives us the following pointers:
|
||||||
|
// Compound-long-int form pointers:
|
||||||
|
// Form:
|
||||||
|
// c_pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + int_invar + int_scale * iv)
|
||||||
|
//
|
||||||
|
// Pointers:
|
||||||
|
// c_pointer1 = adr + min_int + min_int + 1 * ConvI2L(0 + 0 + 1 * 0)
|
||||||
|
// = adr + min_int + min_int
|
||||||
|
// = adr - 2^32
|
||||||
|
//
|
||||||
|
// c_pointer2 = adr + 0 + 0 + 1 * ConvI2L(min_int + min_int + 1 * 0)
|
||||||
|
// = adr + ConvI2L(min_int + min_int)
|
||||||
|
// = adr + 0
|
||||||
|
// = adr
|
||||||
|
//
|
||||||
|
// Simple form pointers:
|
||||||
|
// Form:
|
||||||
|
// s_pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||||
|
// s_pointer = adr + (long_offset + int_offset) + (long_invar + int_invar) + (long_scale * int_scale) * ConvI2L(iv)
|
||||||
|
//
|
||||||
|
// Pointers:
|
||||||
|
// s_pointer1 = adr + (min_int + 0 ) + (min_int + 0 ) + 1 * 0
|
||||||
|
// = adr + min_int + min_int
|
||||||
|
// = adr - 2^32
|
||||||
|
// s_pointer2 = adr + (0 + min_int ) + (0 + min_int ) + 1 * 0
|
||||||
|
// = adr + min_int + min_int
|
||||||
|
// = adr - 2^32
|
||||||
|
//
|
||||||
|
// We see that the two addresses are actually 2^32 bytes apart (derived from the c_pointers), but their simple form look identical.
|
||||||
|
//
|
||||||
|
// Hence, we need to determine in which cases it is safe to make decisions based on the simple
|
||||||
|
// form, rather than the compound-long-int form. If we cannot prove that using the simple form
|
||||||
|
// is safe (i.e. equivalent to the compound-long-int form), then we do not get a valid VPointer,
|
||||||
|
// and the associated memop cannot be vectorized.
|
||||||
|
bool VPointer::is_safe_to_use_as_simple_form(Node* base, Node* adr) const {
|
||||||
|
#ifndef _LP64
|
||||||
|
// On 32-bit platforms, there is never an explicit int_index with ConvI2L for the iv. Thus, the
|
||||||
|
// parsed pointer form is always the simple form, with int operations:
|
||||||
|
//
|
||||||
|
// pointer = adr + offset + invar + scale * iv
|
||||||
|
//
|
||||||
|
assert(!_has_int_index_after_convI2L, "32-bit never has an int_index with ConvI2L for the iv");
|
||||||
|
return true;
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Array accesses that are not Unsafe always have a RangeCheck which ensures that there is no
|
||||||
|
// int_index overflow. This implies that the conversion to long can be done separately:
|
||||||
|
//
|
||||||
|
// ConvI2L(int_index) = ConvI2L(int_offset) + ConvI2L(int_invar) + ConvI2L(scale) * ConvI2L(iv)
|
||||||
|
//
|
||||||
|
// And hence, the simple form is guaranteed to be identical to the compound-long-int form at
|
||||||
|
// runtime and the VPointer is safe/valid to be used.
|
||||||
|
const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr();
|
||||||
|
if (ary_ptr_t != nullptr) {
|
||||||
|
if (!_mem->is_unsafe_access()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We did not find the int_index. Just to be safe, reject this VPointer.
|
||||||
|
if (!_has_int_index_after_convI2L) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int int_offset = _int_index_after_convI2L_offset;
|
||||||
|
Node* int_invar = _int_index_after_convI2L_invar;
|
||||||
|
int int_scale = _int_index_after_convI2L_scale;
|
||||||
|
int long_scale = _scale / int_scale;
|
||||||
|
|
||||||
|
// If "int_index = iv", then the simple form is identical to the compound-long-int form.
|
||||||
|
//
|
||||||
|
// int_index = int_offset + int_invar + int_scale * iv
|
||||||
|
// = 0 0 1 * iv
|
||||||
|
// = iv
|
||||||
|
if (int_offset == 0 && int_invar == nullptr && int_scale == 1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Intuition: What happens if the int_index overflows? Let us look at two pointers on the "overflow edge":
|
||||||
|
//
|
||||||
|
// pointer1 = adr + ConvI2L(int_index1)
|
||||||
|
// pointer2 = adr + ConvI2L(int_index2)
|
||||||
|
//
|
||||||
|
// int_index1 = max_int + 0 = max_int -> very close to but before the overflow
|
||||||
|
// int_index2 = max_int + 1 = min_int -> just enough to get the overflow
|
||||||
|
//
|
||||||
|
// When looking at the difference of pointer1 and pointer2, we notice that it is very large
|
||||||
|
// (almost 2^32). Since arrays have at most 2^31 elements, chances are high that pointer2 is
|
||||||
|
// an actual out-of-bounds access at runtime. These would normally be prevented by range checks
|
||||||
|
// at runtime. However, if the access was done by using Unsafe, where range checks are omitted,
|
||||||
|
// then an out-of-bounds access constitutes undefined behavior. This means that we are allowed to
|
||||||
|
// do anything, including changing the behavior.
|
||||||
|
//
|
||||||
|
// If we can set the right conditions, we have a guarantee that an overflow is either impossible
|
||||||
|
// (no overflow or range checks preventing that) or undefined behavior. In both cases, we are
|
||||||
|
// safe to do a vectorization.
|
||||||
|
//
|
||||||
|
// Approach: We want to prove a lower bound for the distance between these two pointers, and an
|
||||||
|
// upper bound for the size of a memory object. We can derive such an upper bound for
|
||||||
|
// arrays. We know they have at most 2^31 elements. If we know the size of the elements
|
||||||
|
// in bytes, we have:
|
||||||
|
//
|
||||||
|
// array_element_size_in_bytes * 2^31 >= max_possible_array_size_in_bytes
|
||||||
|
// >= array_size_in_bytes (ARR)
|
||||||
|
//
|
||||||
|
// If some small difference "delta" leads to an int_index overflow, we know that the
|
||||||
|
// int_index1 before overflow must have been close to max_int, and the int_index2 after
|
||||||
|
// the overflow must be close to min_int:
|
||||||
|
//
|
||||||
|
// pointer1 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index1)
|
||||||
|
// =approx adr + long_offset + long_invar + long_scale * max_int
|
||||||
|
//
|
||||||
|
// pointer2 = adr + long_offset + long_invar + long_scale * ConvI2L(int_index2)
|
||||||
|
// =approx adr + long_offset + long_invar + long_scale * min_int
|
||||||
|
//
|
||||||
|
// We realize that the pointer difference is very large:
|
||||||
|
//
|
||||||
|
// difference =approx long_scale * 2^32
|
||||||
|
//
|
||||||
|
// Hence, if we set the right condition for long_scale and array_element_size_in_bytes,
|
||||||
|
// we can prove that an overflow is impossible (or would imply undefined behaviour).
|
||||||
|
//
|
||||||
|
// We must now take this intuition, and develop a rigorous proof. We start by stating the problem
|
||||||
|
// more precisely, with the help of some definitions and the Statement we are going to prove.
|
||||||
|
//
|
||||||
|
// Definition:
|
||||||
|
// Two VPointers are "comparable" (i.e. VPointer::comparable is true, set with VPointer::cmp()),
|
||||||
|
// iff all of these conditions apply for the simple form:
|
||||||
|
// 1) Both VPointers are valid.
|
||||||
|
// 2) The adr are identical, or both are array bases of different arrays.
|
||||||
|
// 3) They have identical scale.
|
||||||
|
// 4) They have identical invar.
|
||||||
|
// 5) The difference in offsets is limited: abs(offset1 - offset2) < 2^31. (DIFF)
|
||||||
|
//
|
||||||
|
// For the Vectorization Optimization, we pair-wise compare VPointers and determine if they are:
|
||||||
|
// 1) "not comparable":
|
||||||
|
// We do not optimize them (assume they alias, not assume adjacency).
|
||||||
|
//
|
||||||
|
// Whenever we chose this option based on the simple form, it is also correct based on the
|
||||||
|
// compound-long-int form, since we make no optimizations based on it.
|
||||||
|
//
|
||||||
|
// 2) "comparable" with different array bases at runtime:
|
||||||
|
// We assume they do not alias (remove memory edges), but not assume adjacency.
|
||||||
|
//
|
||||||
|
// Whenever we have two different array bases for the simple form, we also have different
|
||||||
|
// array bases for the compound-long-form. Since VPointers provably point to different
|
||||||
|
// memory objects, they can never alias.
|
||||||
|
//
|
||||||
|
// 3) "comparable" with the same base address:
|
||||||
|
// We compute the relative pointer difference, and based on the load/store size we can
|
||||||
|
// compute aliasing and adjacency.
|
||||||
|
//
|
||||||
|
// We must find a condition under which the pointer difference of the simple form is
|
||||||
|
// identical to the pointer difference of the compound-long-form. We do this with the
|
||||||
|
// Statement below, which we then proceed to prove.
|
||||||
|
//
|
||||||
|
// Statement:
|
||||||
|
// If two VPointers satisfy these 3 conditions:
|
||||||
|
// 1) They are "comparable".
|
||||||
|
// 2) They have the same base address.
|
||||||
|
// 3) Their long_scale is a multiple of the array element size in bytes:
|
||||||
|
//
|
||||||
|
// abs(long_scale) % array_element_size_in_bytes = 0 (A)
|
||||||
|
//
|
||||||
|
// Then their pointer difference of the simple form is identical to the pointer difference
|
||||||
|
// of the compound-long-int form.
|
||||||
|
//
|
||||||
|
// More precisely:
|
||||||
|
// Such two VPointers by definition have identical adr, invar, and scale.
|
||||||
|
// Their simple form is:
|
||||||
|
//
|
||||||
|
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv) (B1)
|
||||||
|
// s_pointer2 = adr + offset2 + invar + scale * ConvI2L(iv) (B2)
|
||||||
|
//
|
||||||
|
// Thus, the pointer difference of the simple forms collapses to the difference in offsets:
|
||||||
|
//
|
||||||
|
// s_difference = s_pointer1 - s_pointer2 = offset1 - offset2 (C)
|
||||||
|
//
|
||||||
|
// Their compound-long-int form for these VPointer is:
|
||||||
|
//
|
||||||
|
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1) (D1)
|
||||||
|
// int_index1 = int_offset1 + int_invar1 + int_scale1 * iv (D2)
|
||||||
|
//
|
||||||
|
// c_pointer2 = adr + long_offset2 + long_invar2 + long_scale2 * ConvI2L(int_index2) (D3)
|
||||||
|
// int_index2 = int_offset2 + int_invar2 + int_scale2 * iv (D4)
|
||||||
|
//
|
||||||
|
// And these are the offset1, offset2, invar and scale from the simple form (B1) and (B2):
|
||||||
|
//
|
||||||
|
// offset1 = long_offset1 + long_scale1 * ConvI2L(int_offset1) (D5)
|
||||||
|
// offset2 = long_offset2 + long_scale2 * ConvI2L(int_offset2) (D6)
|
||||||
|
//
|
||||||
|
// invar = long_invar1 + long_scale1 * ConvI2L(int_invar1)
|
||||||
|
// = long_invar2 + long_scale2 * ConvI2L(int_invar2) (D7)
|
||||||
|
//
|
||||||
|
// scale = long_scale1 * ConvI2L(int_scale1)
|
||||||
|
// = long_scale2 * ConvI2L(int_scale2) (D8)
|
||||||
|
//
|
||||||
|
// The pointer difference of the compound-long-int form is defined as:
|
||||||
|
//
|
||||||
|
// c_difference = c_pointer1 - c_pointer2
|
||||||
|
//
|
||||||
|
// Thus, the statement claims that for the two VPointer we have:
|
||||||
|
//
|
||||||
|
// s_difference = c_difference (Statement)
|
||||||
|
//
|
||||||
|
// We prove the Statement with the help of a Lemma:
|
||||||
|
//
|
||||||
|
// Lemma:
|
||||||
|
// There is some integer x, such that:
|
||||||
|
//
|
||||||
|
// c_difference = s_difference + array_element_size_in_bytes * x * 2^32 (Lemma)
|
||||||
|
//
|
||||||
|
// From condition (DIFF), we can derive:
|
||||||
|
//
|
||||||
|
// abs(s_difference) < 2^31 (E)
|
||||||
|
//
|
||||||
|
// Assuming the Lemma, we prove the Statement:
|
||||||
|
// If "x = 0" (intuitively: the int_index does not overflow), then:
|
||||||
|
// c_difference = s_difference
|
||||||
|
// and hence the simple form computes the same pointer difference as the compound-long-int form.
|
||||||
|
// If "x != 0" (intuitively: the int_index overflows), then:
|
||||||
|
// abs(c_difference) >= abs(s_difference + array_element_size_in_bytes * x * 2^32)
|
||||||
|
// >= array_element_size_in_bytes * 2^32 - abs(s_difference)
|
||||||
|
// -- apply (E) --
|
||||||
|
// > array_element_size_in_bytes * 2^32 - 2^31
|
||||||
|
// >= array_element_size_in_bytes * 2^31
|
||||||
|
// -- apply (ARR) --
|
||||||
|
// >= max_possible_array_size_in_bytes
|
||||||
|
// >= array_size_in_bytes
|
||||||
|
//
|
||||||
|
// This shows that c_pointer1 and c_pointer2 have a distance that exceeds the maximum array size.
|
||||||
|
// Thus, at least one of the two pointers must be outside of the array bounds. But we can assume
|
||||||
|
// that out-of-bounds accesses do not happen. If they still do, it is undefined behavior. Hence,
|
||||||
|
// we are allowed to do anything. We can also "safely" use the simple form in this case even though
|
||||||
|
// it might not match the compound-long-int form at runtime.
|
||||||
|
// QED Statement.
|
||||||
|
//
|
||||||
|
// We must now prove the Lemma.
|
||||||
|
//
|
||||||
|
// ConvI2L always truncates by some power of 2^32, i.e. there is some integer y such that:
|
||||||
|
//
|
||||||
|
// ConvI2L(y1 + y2) = ConvI2L(y1) + ConvI2L(y2) + 2^32 * y (F)
|
||||||
|
//
|
||||||
|
// It follows, that there is an integer y1 such that:
|
||||||
|
//
|
||||||
|
// ConvI2L(int_index1) = ConvI2L(int_offset1 + int_invar1 + int_scale1 * iv)
|
||||||
|
// -- apply (F) --
|
||||||
|
// = ConvI2L(int_offset1)
|
||||||
|
// + ConvI2L(int_invar1)
|
||||||
|
// + ConvI2L(int_scale1) * ConvI2L(iv)
|
||||||
|
// + y1 * 2^32 (G)
|
||||||
|
//
|
||||||
|
// Thus, we can write the compound-long-int form (D1) as:
|
||||||
|
//
|
||||||
|
// c_pointer1 = adr + long_offset1 + long_invar1 + long_scale1 * ConvI2L(int_index1)
|
||||||
|
// -- apply (G) --
|
||||||
|
// = adr
|
||||||
|
// + long_offset1
|
||||||
|
// + long_invar1
|
||||||
|
// + long_scale1 * ConvI2L(int_offset1)
|
||||||
|
// + long_scale1 * ConvI2L(int_invar1)
|
||||||
|
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv)
|
||||||
|
// + long_scale1 * y1 * 2^32 (H)
|
||||||
|
//
|
||||||
|
// And we can write the simple form as:
|
||||||
|
//
|
||||||
|
// s_pointer1 = adr + offset1 + invar + scale * ConvI2L(iv)
|
||||||
|
// -- apply (D5, D7, D8) --
|
||||||
|
// = adr
|
||||||
|
// + long_offset1
|
||||||
|
// + long_scale1 * ConvI2L(int_offset1)
|
||||||
|
// + long_invar1
|
||||||
|
// + long_scale1 * ConvI2L(int_invar1)
|
||||||
|
// + long_scale1 * ConvI2L(int_scale1) * ConvI2L(iv) (K)
|
||||||
|
//
|
||||||
|
// We now compute the pointer difference between the simple (K) and compound-long-int form (H).
|
||||||
|
// Most terms cancel out immediately:
|
||||||
|
//
|
||||||
|
// sc_difference1 = c_pointer1 - s_pointer1 = long_scale1 * y1 * 2^32 (L)
|
||||||
|
//
|
||||||
|
// Rearranging the equation (L), we get:
|
||||||
|
//
|
||||||
|
// c_pointer1 = s_pointer1 + long_scale1 * y1 * 2^32 (M)
|
||||||
|
//
|
||||||
|
// And since long_scale1 is a multiple of array_element_size_in_bytes, there is some integer
|
||||||
|
// x1, such that (M) implies:
|
||||||
|
//
|
||||||
|
// c_pointer1 = s_pointer1 + array_element_size_in_bytes * x1 * 2^32 (N)
|
||||||
|
//
|
||||||
|
// With an analogue equation for c_pointer2, we can now compute the pointer difference for
|
||||||
|
// the compound-long-int form:
|
||||||
|
//
|
||||||
|
// c_difference = c_pointer1 - c_pointer2
|
||||||
|
// -- apply (N) --
|
||||||
|
// = s_pointer1 + array_element_size_in_bytes * x1 * 2^32
|
||||||
|
// -(s_pointer2 + array_element_size_in_bytes * x2 * 2^32)
|
||||||
|
// -- where "x = x1 - x2" --
|
||||||
|
// = s_pointer1 - s_pointer2 + array_element_size_in_bytes * x * 2^32
|
||||||
|
// -- apply (C) --
|
||||||
|
// = s_difference + array_element_size_in_bytes * x * 2^32
|
||||||
|
// QED Lemma.
|
||||||
|
if (ary_ptr_t != nullptr) {
|
||||||
|
BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type();
|
||||||
|
if (is_java_primitive(array_element_bt)) {
|
||||||
|
int array_element_size_in_bytes = type2aelembytes(array_element_bt);
|
||||||
|
if (abs(long_scale) % array_element_size_in_bytes == 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// General case: we do not know if it is safe to use the simple form.
|
||||||
|
return false;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool VPointer::is_loop_member(Node* n) const {
|
bool VPointer::is_loop_member(Node* n) const {
|
||||||
Node* n_c = phase()->get_ctrl(n);
|
Node* n_c = phase()->get_ctrl(n);
|
||||||
return lpt()->is_member(phase()->get_loop(n_c));
|
return lpt()->is_member(phase()->get_loop(n_c));
|
||||||
@ -632,6 +993,37 @@ bool VPointer::scaled_iv(Node* n) {
|
|||||||
NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
|
NOT_PRODUCT(_tracer.scaled_iv_6(n, _scale);)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else if (opc == Op_ConvI2L && !has_iv()) {
|
||||||
|
// So far we have not found the iv yet, and are about to enter a ConvI2L subgraph,
|
||||||
|
// which may be the int index (that might overflow) for the memory access, of the form:
|
||||||
|
//
|
||||||
|
// int_index = int_offset + int_invar + int_scale * iv
|
||||||
|
//
|
||||||
|
// If we simply continue parsing with the current VPointer, then the int_offset and
|
||||||
|
// int_invar simply get added to the long offset and invar. But for the checks in
|
||||||
|
// VPointer::is_safe_to_use_as_simple_form() we need to have explicit access to the
|
||||||
|
// int_index. Thus, we must parse it explicitly here. For this, we use a temporary
|
||||||
|
// VPointer, to pattern match the int_index sub-expression of the address.
|
||||||
|
|
||||||
|
NOT_PRODUCT(Tracer::Depth dddd;)
|
||||||
|
VPointer tmp(this);
|
||||||
|
NOT_PRODUCT(_tracer.scaled_iv_8(n, &tmp);)
|
||||||
|
|
||||||
|
if (tmp.scaled_iv_plus_offset(n->in(1)) && tmp.has_iv()) {
|
||||||
|
// We successfully matched an integer index, of the form:
|
||||||
|
// int_index = int_offset + int_invar + int_scale * iv
|
||||||
|
_has_int_index_after_convI2L = true;
|
||||||
|
_int_index_after_convI2L_offset = tmp._offset;
|
||||||
|
_int_index_after_convI2L_invar = tmp._invar;
|
||||||
|
_int_index_after_convI2L_scale = tmp._scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now parse it again for the real VPointer. This makes sure that the int_offset, int_invar,
|
||||||
|
// and int_scale are properly added to the final VPointer's offset, invar, and scale.
|
||||||
|
if (scaled_iv_plus_offset(n->in(1))) {
|
||||||
|
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
} else if (opc == Op_ConvI2L || opc == Op_CastII) {
|
} else if (opc == Op_ConvI2L || opc == Op_CastII) {
|
||||||
if (scaled_iv_plus_offset(n->in(1))) {
|
if (scaled_iv_plus_offset(n->in(1))) {
|
||||||
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
|
NOT_PRODUCT(_tracer.scaled_iv_7(n);)
|
||||||
@ -648,8 +1040,17 @@ bool VPointer::scaled_iv(Node* n) {
|
|||||||
|
|
||||||
if (tmp.scaled_iv_plus_offset(n->in(1))) {
|
if (tmp.scaled_iv_plus_offset(n->in(1))) {
|
||||||
int scale = n->in(2)->get_int();
|
int scale = n->in(2)->get_int();
|
||||||
|
// Accumulate scale.
|
||||||
_scale = tmp._scale << scale;
|
_scale = tmp._scale << scale;
|
||||||
_offset += tmp._offset << scale;
|
// Accumulate offset.
|
||||||
|
int shifted_offset = 0;
|
||||||
|
if (!try_LShiftI_no_overflow(tmp._offset, scale, shifted_offset)) {
|
||||||
|
return false; // shift overflow.
|
||||||
|
}
|
||||||
|
if (!try_AddI_no_overflow(_offset, shifted_offset, _offset)) {
|
||||||
|
return false; // add overflow.
|
||||||
|
}
|
||||||
|
// Accumulate invar.
|
||||||
if (tmp._invar != nullptr) {
|
if (tmp._invar != nullptr) {
|
||||||
BasicType bt = tmp._invar->bottom_type()->basic_type();
|
BasicType bt = tmp._invar->bottom_type()->basic_type();
|
||||||
assert(bt == T_INT || bt == T_LONG, "");
|
assert(bt == T_INT || bt == T_LONG, "");
|
||||||
@ -658,6 +1059,13 @@ bool VPointer::scaled_iv(Node* n) {
|
|||||||
_debug_invar_scale = n->in(2);
|
_debug_invar_scale = n->in(2);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Forward info about the int_index:
|
||||||
|
_has_int_index_after_convI2L = tmp._has_int_index_after_convI2L;
|
||||||
|
_int_index_after_convI2L_offset = tmp._int_index_after_convI2L_offset;
|
||||||
|
_int_index_after_convI2L_invar = tmp._int_index_after_convI2L_invar;
|
||||||
|
_int_index_after_convI2L_scale = tmp._int_index_after_convI2L_scale;
|
||||||
|
|
||||||
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);)
|
NOT_PRODUCT(_tracer.scaled_iv_9(n, _scale, _offset, _invar);)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -675,7 +1083,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
|||||||
|
|
||||||
int opc = n->Opcode();
|
int opc = n->Opcode();
|
||||||
if (opc == Op_ConI) {
|
if (opc == Op_ConI) {
|
||||||
_offset += negate ? -(n->get_int()) : n->get_int();
|
if (!try_AddSubI_no_overflow(_offset, n->get_int(), negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_2(n, _offset);)
|
||||||
return true;
|
return true;
|
||||||
} else if (opc == Op_ConL) {
|
} else if (opc == Op_ConL) {
|
||||||
@ -684,7 +1094,9 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
|||||||
if (t->higher_equal(TypeLong::INT)) {
|
if (t->higher_equal(TypeLong::INT)) {
|
||||||
jlong loff = n->get_long();
|
jlong loff = n->get_long();
|
||||||
jint off = (jint)loff;
|
jint off = (jint)loff;
|
||||||
_offset += negate ? -off : loff;
|
if (!try_AddSubI_no_overflow(_offset, off, negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_3(n, _offset);)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -699,11 +1111,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
|||||||
if (opc == Op_AddI) {
|
if (opc == Op_AddI) {
|
||||||
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
||||||
maybe_add_to_invar(n->in(1), negate);
|
maybe_add_to_invar(n->in(1), negate);
|
||||||
_offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
|
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_6(n, _invar, negate, _offset);)
|
||||||
return true;
|
return true;
|
||||||
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
||||||
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
|
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
maybe_add_to_invar(n->in(2), negate);
|
maybe_add_to_invar(n->in(2), negate);
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_7(n, _invar, negate, _offset);)
|
||||||
return true;
|
return true;
|
||||||
@ -712,11 +1128,15 @@ bool VPointer::offset_plus_k(Node* n, bool negate) {
|
|||||||
if (opc == Op_SubI) {
|
if (opc == Op_SubI) {
|
||||||
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
if (n->in(2)->is_Con() && invariant(n->in(1))) {
|
||||||
maybe_add_to_invar(n->in(1), negate);
|
maybe_add_to_invar(n->in(1), negate);
|
||||||
_offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
|
if (!try_AddSubI_no_overflow(_offset, n->in(2)->get_int(), !negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_8(n, _invar, negate, _offset);)
|
||||||
return true;
|
return true;
|
||||||
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
} else if (n->in(1)->is_Con() && invariant(n->in(2))) {
|
||||||
_offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
|
if (!try_AddSubI_no_overflow(_offset, n->in(1)->get_int(), negate, _offset)) {
|
||||||
|
return false; // add/sub overflow.
|
||||||
|
}
|
||||||
maybe_add_to_invar(n->in(2), !negate);
|
maybe_add_to_invar(n->in(2), !negate);
|
||||||
NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);)
|
NOT_PRODUCT(_tracer.offset_plus_k_9(n, _invar, !negate, _offset);)
|
||||||
return true;
|
return true;
|
||||||
@ -806,6 +1226,44 @@ void VPointer::maybe_add_to_invar(Node* new_invar, bool negate) {
|
|||||||
_invar = register_if_new(add);
|
_invar = register_if_new(add);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool VPointer::try_AddI_no_overflow(int offset1, int offset2, int& result) {
|
||||||
|
jlong long_offset = java_add((jlong)(offset1), (jlong)(offset2));
|
||||||
|
jint int_offset = java_add( offset1, offset2);
|
||||||
|
if (long_offset != int_offset) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result = int_offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VPointer::try_SubI_no_overflow(int offset1, int offset2, int& result) {
|
||||||
|
jlong long_offset = java_subtract((jlong)(offset1), (jlong)(offset2));
|
||||||
|
jint int_offset = java_subtract( offset1, offset2);
|
||||||
|
if (long_offset != int_offset) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result = int_offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VPointer::try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result) {
|
||||||
|
if (is_sub) {
|
||||||
|
return try_SubI_no_overflow(offset1, offset2, result);
|
||||||
|
} else {
|
||||||
|
return try_AddI_no_overflow(offset1, offset2, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VPointer::try_LShiftI_no_overflow(int offset, int shift, int& result) {
|
||||||
|
jlong long_offset = java_shift_left((jlong)(offset), shift);
|
||||||
|
jint int_offset = java_shift_left( offset, shift);
|
||||||
|
if (long_offset != int_offset) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result = int_offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// We use two comparisons, because a subtraction could underflow.
|
// We use two comparisons, because a subtraction could underflow.
|
||||||
#define RETURN_CMP_VALUE_IF_NOT_EQUAL(a, b) \
|
#define RETURN_CMP_VALUE_IF_NOT_EQUAL(a, b) \
|
||||||
if (a < b) { return -1; } \
|
if (a < b) { return -1; } \
|
||||||
|
@ -670,13 +670,51 @@ private:
|
|||||||
// A vectorization pointer (VPointer) has information about an address for
|
// A vectorization pointer (VPointer) has information about an address for
|
||||||
// dependence checking and vector alignment. It's usually bound to a memory
|
// dependence checking and vector alignment. It's usually bound to a memory
|
||||||
// operation in a counted loop for vectorizable analysis.
|
// operation in a counted loop for vectorizable analysis.
|
||||||
|
//
|
||||||
|
// We parse and represent pointers of the simple form:
|
||||||
|
//
|
||||||
|
// pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||||
|
//
|
||||||
|
// Where:
|
||||||
|
//
|
||||||
|
// adr: the base address of an array (base = adr)
|
||||||
|
// OR
|
||||||
|
// some address to off-heap memory (base = TOP)
|
||||||
|
//
|
||||||
|
// offset: a constant offset
|
||||||
|
// invar: a runtime variable, which is invariant during the loop
|
||||||
|
// scale: scaling factor
|
||||||
|
// iv: loop induction variable
|
||||||
|
//
|
||||||
|
// But more precisely, we parse the composite-long-int form:
|
||||||
|
//
|
||||||
|
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_offset + inv_invar + int_scale * iv)
|
||||||
|
//
|
||||||
|
// pointer = adr + long_offset + long_invar + long_scale * ConvI2L(int_index)
|
||||||
|
// int_index = int_offset + int_invar + int_scale * iv
|
||||||
|
//
|
||||||
|
// However, for aliasing and adjacency checks (e.g. VPointer::cmp()) we always use the simple form to make
|
||||||
|
// decisions. Hence, we must make sure to only create a "valid" VPointer if the optimisations based on the
|
||||||
|
// simple form produce the same result as the compound-long-int form would. Intuitively, this depends on
|
||||||
|
// if the int_index overflows, but the precise conditions are given in VPointer::is_safe_to_use_as_simple_form().
|
||||||
|
//
|
||||||
|
// ConvI2L(int_index) = ConvI2L(int_offset + int_invar + int_scale * iv)
|
||||||
|
// = Convi2L(int_offset) + ConvI2L(int_invar) + ConvI2L(int_scale) * ConvI2L(iv)
|
||||||
|
//
|
||||||
|
// scale = long_scale * ConvI2L(int_scale)
|
||||||
|
// offset = long_offset + long_scale * ConvI2L(int_offset)
|
||||||
|
// invar = long_invar + long_scale * ConvI2L(int_invar)
|
||||||
|
//
|
||||||
|
// pointer = adr + offset + invar + scale * ConvI2L(iv)
|
||||||
|
//
|
||||||
class VPointer : public ArenaObj {
|
class VPointer : public ArenaObj {
|
||||||
protected:
|
protected:
|
||||||
MemNode* const _mem; // My memory reference node
|
MemNode* const _mem; // My memory reference node
|
||||||
const VLoop& _vloop;
|
const VLoop& _vloop;
|
||||||
|
|
||||||
Node* _base; // null if unsafe nonheap reference
|
// Components of the simple form:
|
||||||
Node* _adr; // address pointer
|
Node* _base; // Base address of an array OR null if some off-heap memory.
|
||||||
|
Node* _adr; // Same as _base if an array pointer OR some off-heap memory pointer.
|
||||||
int _scale; // multiplier for iv (in bytes), 0 if no loop iv
|
int _scale; // multiplier for iv (in bytes), 0 if no loop iv
|
||||||
int _offset; // constant offset (in bytes)
|
int _offset; // constant offset (in bytes)
|
||||||
|
|
||||||
@ -687,6 +725,13 @@ class VPointer : public ArenaObj {
|
|||||||
Node* _debug_invar_scale; // multiplier for invariant
|
Node* _debug_invar_scale; // multiplier for invariant
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// The int_index components of the compound-long-int form. Used to decide if it is safe to use the
|
||||||
|
// simple form rather than the compound-long-int form that was parsed.
|
||||||
|
bool _has_int_index_after_convI2L;
|
||||||
|
int _int_index_after_convI2L_offset;
|
||||||
|
Node* _int_index_after_convI2L_invar;
|
||||||
|
int _int_index_after_convI2L_scale;
|
||||||
|
|
||||||
Node_Stack* _nstack; // stack used to record a vpointer trace of variants
|
Node_Stack* _nstack; // stack used to record a vpointer trace of variants
|
||||||
bool _analyze_only; // Used in loop unrolling only for vpointer trace
|
bool _analyze_only; // Used in loop unrolling only for vpointer trace
|
||||||
uint _stack_idx; // Used in loop unrolling only for vpointer trace
|
uint _stack_idx; // Used in loop unrolling only for vpointer trace
|
||||||
@ -726,6 +771,8 @@ class VPointer : public ArenaObj {
|
|||||||
VPointer(VPointer* p);
|
VPointer(VPointer* p);
|
||||||
NONCOPYABLE(VPointer);
|
NONCOPYABLE(VPointer);
|
||||||
|
|
||||||
|
bool is_safe_to_use_as_simple_form(Node* base, Node* adr) const;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool valid() const { return _adr != nullptr; }
|
bool valid() const { return _adr != nullptr; }
|
||||||
bool has_iv() const { return _scale != 0; }
|
bool has_iv() const { return _scale != 0; }
|
||||||
@ -751,10 +798,43 @@ class VPointer : public ArenaObj {
|
|||||||
return _invar == q._invar;
|
return _invar == q._invar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We compute if and how two VPointers can alias at runtime, i.e. if the two addressed regions of memory can
|
||||||
|
// ever overlap. There are essentially 3 relevant return states:
|
||||||
|
// - NotComparable: Synonymous to "unknown aliasing".
|
||||||
|
// We have no information about how the two VPointers can alias. They could overlap, refer
|
||||||
|
// to another location in the same memory object, or point to a completely different object.
|
||||||
|
// -> Memory edge required. Aliasing unlikely but possible.
|
||||||
|
//
|
||||||
|
// - Less / Greater: Synonymous to "never aliasing".
|
||||||
|
// The two VPointers may point into the same memory object, but be non-aliasing (i.e. we
|
||||||
|
// know both address regions inside the same memory object, but these regions are non-
|
||||||
|
// overlapping), or the VPointers point to entirely different objects.
|
||||||
|
// -> No memory edge required. Aliasing impossible.
|
||||||
|
//
|
||||||
|
// - Equal: Synonymous to "overlap, or point to different memory objects".
|
||||||
|
// The two VPointers either overlap on the same memory object, or point to two different
|
||||||
|
// memory objects.
|
||||||
|
// -> Memory edge required. Aliasing likely.
|
||||||
|
//
|
||||||
|
// In a future refactoring, we can simplify to two states:
|
||||||
|
// - NeverAlias: instead of Less / Greater
|
||||||
|
// - MayAlias: instead of Equal / NotComparable
|
||||||
|
//
|
||||||
|
// Two VPointer are "comparable" (Less / Greater / Equal), iff all of these conditions apply:
|
||||||
|
// 1) Both are valid, i.e. expressible in the compound-long-int or simple form.
|
||||||
|
// 2) The adr are identical, or both are array bases of different arrays.
|
||||||
|
// 3) They have identical scale.
|
||||||
|
// 4) They have identical invar.
|
||||||
|
// 5) The difference in offsets is limited: abs(offset0 - offset1) < 2^31.
|
||||||
int cmp(const VPointer& q) const {
|
int cmp(const VPointer& q) const {
|
||||||
if (valid() && q.valid() &&
|
if (valid() && q.valid() &&
|
||||||
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
|
(_adr == q._adr || (_base == _adr && q._base == q._adr)) &&
|
||||||
_scale == q._scale && invar_equals(q)) {
|
_scale == q._scale && invar_equals(q)) {
|
||||||
|
jlong difference = abs(java_subtract((jlong)_offset, (jlong)q._offset));
|
||||||
|
jlong max_diff = (jlong)1 << 31;
|
||||||
|
if (difference >= max_diff) {
|
||||||
|
return NotComparable;
|
||||||
|
}
|
||||||
bool overlap = q._offset < _offset + memory_size() &&
|
bool overlap = q._offset < _offset + memory_size() &&
|
||||||
_offset < q._offset + q.memory_size();
|
_offset < q._offset + q.memory_size();
|
||||||
return overlap ? Equal : (_offset < q._offset ? Less : Greater);
|
return overlap ? Equal : (_offset < q._offset ? Less : Greater);
|
||||||
@ -859,6 +939,11 @@ class VPointer : public ArenaObj {
|
|||||||
|
|
||||||
void maybe_add_to_invar(Node* new_invar, bool negate);
|
void maybe_add_to_invar(Node* new_invar, bool negate);
|
||||||
|
|
||||||
|
static bool try_AddI_no_overflow(int offset1, int offset2, int& result);
|
||||||
|
static bool try_SubI_no_overflow(int offset1, int offset2, int& result);
|
||||||
|
static bool try_AddSubI_no_overflow(int offset1, int offset2, bool is_sub, int& result);
|
||||||
|
static bool try_LShiftI_no_overflow(int offset1, int offset2, int& result);
|
||||||
|
|
||||||
Node* register_if_new(Node* n) const;
|
Node* register_if_new(Node* n) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
|
* Copyright (c) 2023, Red Hat, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -35,7 +36,6 @@ import java.nio.ByteOrder;
|
|||||||
* @test
|
* @test
|
||||||
* @bug 8300258
|
* @bug 8300258
|
||||||
* @key randomness
|
* @key randomness
|
||||||
* @requires (os.simpleArch == "x64") | (os.simpleArch == "aarch64")
|
|
||||||
* @summary C2: vectorization fails on simple ByteBuffer loop
|
* @summary C2: vectorization fails on simple ByteBuffer loop
|
||||||
* @modules java.base/jdk.internal.misc
|
* @modules java.base/jdk.internal.misc
|
||||||
* @library /test/lib /
|
* @library /test/lib /
|
||||||
@ -147,193 +147,420 @@ public class TestVectorizationMismatchedAccess {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteLong1(byte[] dest, long[] src) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteLong1a(byte[] dest, long[] src) {
|
||||||
for (int i = 0; i < src.length; i++) {
|
for (int i = 0; i < src.length; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, src[i]);
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteLong1")
|
@Test
|
||||||
public static void testByteLong1_runner() {
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
runAndVerify(() -> testByteLong1(byteArray, longArray), 0);
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteLong1b(byte[] dest, long[] src) {
|
||||||
|
for (int i = 0; i < src.length; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, src[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteLong2(byte[] dest, long[] src) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"})
|
||||||
|
public static void testByteLong1c(byte[] dest, long[] src) {
|
||||||
|
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
|
||||||
|
for (int i = 0; i < src.length - 8; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, base + 8 * i, src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteLong1d(byte[] dest, long[] src) {
|
||||||
|
long base = 64; // make sure it is big enough and 8 byte aligned (required for 32-bit)
|
||||||
|
for (int i = 0; i < src.length - 8; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, base + 8L * i, src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteLong1a", "testByteLong1b", "testByteLong1c", "testByteLong1d"})
|
||||||
|
public static void testByteLong1_runner() {
|
||||||
|
runAndVerify(() -> testByteLong1a(byteArray, longArray), 0);
|
||||||
|
runAndVerify(() -> testByteLong1b(byteArray, longArray), 0);
|
||||||
|
testByteLong1c(byteArray, longArray);
|
||||||
|
testByteLong1d(byteArray, longArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteLong2a(byte[] dest, long[] src) {
|
||||||
for (int i = 1; i < src.length; i++) {
|
for (int i = 1; i < src.length; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), src[i]);
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteLong2")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteLong2b(byte[] dest, long[] src) {
|
||||||
|
for (int i = 1; i < src.length; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteLong2a", "testByteLong2b"})
|
||||||
public static void testByteLong2_runner() {
|
public static void testByteLong2_runner() {
|
||||||
runAndVerify(() -> testByteLong2(byteArray, longArray), -8);
|
runAndVerify(() -> testByteLong2a(byteArray, longArray), -8);
|
||||||
|
runAndVerify(() -> testByteLong2b(byteArray, longArray), -8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteLong3(byte[] dest, long[] src) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteLong3a(byte[] dest, long[] src) {
|
||||||
for (int i = 0; i < src.length - 1; i++) {
|
for (int i = 0; i < src.length - 1; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), src[i]);
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteLong3")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteLong3b(byte[] dest, long[] src) {
|
||||||
|
for (int i = 0; i < src.length - 1; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteLong3a", "testByteLong3b"})
|
||||||
public static void testByteLong3_runner() {
|
public static void testByteLong3_runner() {
|
||||||
runAndVerify(() -> testByteLong3(byteArray, longArray), 8);
|
runAndVerify(() -> testByteLong3a(byteArray, longArray), 8);
|
||||||
|
runAndVerify(() -> testByteLong3b(byteArray, longArray), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"},
|
||||||
applyIf = {"AlignVector", "false"})
|
applyIf = {"AlignVector", "false"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
// AlignVector cannot guarantee that invar is aligned.
|
// AlignVector cannot guarantee that invar is aligned.
|
||||||
public static void testByteLong4(byte[] dest, long[] src, int start, int stop) {
|
public static void testByteLong4a(byte[] dest, long[] src, int start, int stop) {
|
||||||
for (int i = start; i < stop; i++) {
|
for (int i = start; i < stop; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, src[i]);
|
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteLong4")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"},
|
||||||
|
applyIf = {"AlignVector", "false"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
// AlignVector cannot guarantee that invar is aligned.
|
||||||
|
public static void testByteLong4b(byte[] dest, long[] src, int start, int stop) {
|
||||||
|
for (int i = start; i < stop; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteLong4a", "testByteLong4b"})
|
||||||
public static void testByteLong4_runner() {
|
public static void testByteLong4_runner() {
|
||||||
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
runAndVerify(() -> testByteLong4(byteArray, longArray, 0, size), 0);
|
runAndVerify(() -> testByteLong4a(byteArray, longArray, 0, size), 0);
|
||||||
|
runAndVerify(() -> testByteLong4b(byteArray, longArray, 0, size), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteLong5(byte[] dest, long[] src, int start, int stop) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteLong5a(byte[] dest, long[] src, int start, int stop) {
|
||||||
for (int i = start; i < stop; i++) {
|
for (int i = start; i < stop; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), src[i]);
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteLong5")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteLong5b(byte[] dest, long[] src, int start, int stop) {
|
||||||
|
for (int i = start; i < stop; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteLong5a", "testByteLong5b"})
|
||||||
public static void testByteLong5_runner() {
|
public static void testByteLong5_runner() {
|
||||||
baseOffset = 1;
|
baseOffset = 1;
|
||||||
runAndVerify(() -> testByteLong5(byteArray, longArray, 0, size-1), 8);
|
runAndVerify(() -> testByteLong5a(byteArray, longArray, 0, size-1), 8);
|
||||||
|
runAndVerify(() -> testByteLong5b(byteArray, longArray, 0, size-1), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteByte1(byte[] dest, byte[] src) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteByte1a(byte[] dest, byte[] src) {
|
||||||
for (int i = 0; i < src.length / 8; i++) {
|
for (int i = 0; i < src.length / 8; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteByte1")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteByte1b(byte[] dest, byte[] src) {
|
||||||
|
for (int i = 0; i < src.length / 8; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteByte1a", "testByteByte1b"})
|
||||||
public static void testByteByte1_runner() {
|
public static void testByteByte1_runner() {
|
||||||
runAndVerify2(() -> testByteByte1(byteArray, byteArray), 0);
|
runAndVerify2(() -> testByteByte1a(byteArray, byteArray), 0);
|
||||||
|
runAndVerify2(() -> testByteByte1b(byteArray, byteArray), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
public static void testByteByte2(byte[] dest, byte[] src) {
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: offsets are badly aligned (UNSAFE.ARRAY_BYTE_BASE_OFFSET is 4 byte aligned, but not 8 byte aligned).
|
||||||
|
// might get fixed with JDK-8325155.
|
||||||
|
public static void testByteByte2a(byte[] dest, byte[] src) {
|
||||||
for (int i = 1; i < src.length / 8; i++) {
|
for (int i = 1; i < src.length / 8; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteByte2")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
applyIfCPUFeatureOr = {"sse2", "true", "asimd", "true"},
|
||||||
|
applyIfPlatform = {"64-bit", "true"})
|
||||||
|
// 32-bit: address has ConvL2I for cast of long to address, not supported.
|
||||||
|
public static void testByteByte2b(byte[] dest, byte[] src) {
|
||||||
|
for (int i = 1; i < src.length / 8; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i - 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteByte2a", "testByteByte2b"})
|
||||||
public static void testByteByte2_runner() {
|
public static void testByteByte2_runner() {
|
||||||
runAndVerify2(() -> testByteByte2(byteArray, byteArray), -8);
|
runAndVerify2(() -> testByteByte2a(byteArray, byteArray), -8);
|
||||||
|
runAndVerify2(() -> testByteByte2b(byteArray, byteArray), -8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
public static void testByteByte3(byte[] dest, byte[] src) {
|
public static void testByteByte3a(byte[] dest, byte[] src) {
|
||||||
for (int i = 0; i < src.length / 8 - 1; i++) {
|
for (int i = 0; i < src.length / 8 - 1; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteByte3")
|
@Test
|
||||||
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
|
public static void testByteByte3b(byte[] dest, byte[] src) {
|
||||||
|
for (int i = 0; i < src.length / 8 - 1; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + 1), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteByte3a", "testByteByte3b"})
|
||||||
public static void testByteByte3_runner() {
|
public static void testByteByte3_runner() {
|
||||||
runAndVerify2(() -> testByteByte3(byteArray, byteArray), 8);
|
runAndVerify2(() -> testByteByte3a(byteArray, byteArray), 8);
|
||||||
|
runAndVerify2(() -> testByteByte3b(byteArray, byteArray), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
public static void testByteByte4(byte[] dest, byte[] src, int start, int stop) {
|
public static void testByteByte4a(byte[] dest, byte[] src, int start, int stop) {
|
||||||
for (int i = start; i < stop; i++) {
|
for (int i = start; i < stop; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
UNSAFE.putLongUnaligned(dest, 8 * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteByte4")
|
@Test
|
||||||
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
|
public static void testByteByte4b(byte[] dest, byte[] src, int start, int stop) {
|
||||||
|
for (int i = start; i < stop; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, 8L * i + baseOffset, UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteByte4a", "testByteByte4b"})
|
||||||
public static void testByteByte4_runner() {
|
public static void testByteByte4_runner() {
|
||||||
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
baseOffset = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||||
runAndVerify2(() -> testByteByte4(byteArray, byteArray, 0, size), 0);
|
runAndVerify2(() -> testByteByte4a(byteArray, byteArray, 0, size), 0);
|
||||||
|
runAndVerify2(() -> testByteByte4b(byteArray, byteArray, 0, size), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
public static void testByteByte5(byte[] dest, byte[] src, int start, int stop) {
|
public static void testByteByte5a(byte[] dest, byte[] src, int start, int stop) {
|
||||||
for (int i = start; i < stop; i++) {
|
for (int i = start; i < stop; i++) {
|
||||||
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8 * i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testByteByte5")
|
@Test
|
||||||
|
@IR(failOn = { IRNode.LOAD_VECTOR_L, IRNode.STORE_VECTOR })
|
||||||
|
public static void testByteByte5b(byte[] dest, byte[] src, int start, int stop) {
|
||||||
|
for (int i = start; i < stop; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(dest, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * (i + baseOffset), UNSAFE.getLongUnaligned(src, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 8L * i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testByteByte5a", "testByteByte5b"})
|
||||||
public static void testByteByte5_runner() {
|
public static void testByteByte5_runner() {
|
||||||
baseOffset = 1;
|
baseOffset = 1;
|
||||||
runAndVerify2(() -> testByteByte5(byteArray, byteArray, 0, size-1), 8);
|
runAndVerify2(() -> testByteByte5a(byteArray, byteArray, 0, size-1), 8);
|
||||||
|
runAndVerify2(() -> testByteByte5b(byteArray, byteArray, 0, size-1), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
public static void testOffHeapLong1(long dest, long[] src) {
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P(dest + 8 * (i + int_con))
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong1a(long dest, long[] src) {
|
||||||
for (int i = 0; i < src.length; i++) {
|
for (int i = 0; i < src.length; i++) {
|
||||||
UNSAFE.putLongUnaligned(null, dest + 8 * i, src[i]);
|
UNSAFE.putLongUnaligned(null, dest + 8 * i, src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testOffHeapLong1")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P(dest + 8L * (i + int_con))
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong1b(long dest, long[] src) {
|
||||||
|
for (int i = 0; i < src.length; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(null, dest + 8L * i, src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testOffHeapLong1a", "testOffHeapLong1b"})
|
||||||
public static void testOffHeapLong1_runner() {
|
public static void testOffHeapLong1_runner() {
|
||||||
runAndVerify3(() -> testOffHeapLong1(baseOffHeap, longArray), 0);
|
runAndVerify3(() -> testOffHeapLong1a(baseOffHeap, longArray), 0);
|
||||||
|
runAndVerify3(() -> testOffHeapLong1b(baseOffHeap, longArray), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
public static void testOffHeapLong2(long dest, long[] src) {
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong2a(long dest, long[] src) {
|
||||||
for (int i = 1; i < src.length; i++) {
|
for (int i = 1; i < src.length; i++) {
|
||||||
UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), src[i]);
|
UNSAFE.putLongUnaligned(null, dest + 8 * (i - 1), src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testOffHeapLong2")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong2b(long dest, long[] src) {
|
||||||
|
for (int i = 1; i < src.length; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(null, dest + 8L * (i - 1), src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testOffHeapLong2a", "testOffHeapLong2b"})
|
||||||
public static void testOffHeapLong2_runner() {
|
public static void testOffHeapLong2_runner() {
|
||||||
runAndVerify3(() -> testOffHeapLong2(baseOffHeap, longArray), -8);
|
runAndVerify3(() -> testOffHeapLong2a(baseOffHeap, longArray), -8);
|
||||||
|
runAndVerify3(() -> testOffHeapLong2b(baseOffHeap, longArray), -8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
public static void testOffHeapLong3(long dest, long[] src) {
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong3a(long dest, long[] src) {
|
||||||
for (int i = 0; i < src.length - 1; i++) {
|
for (int i = 0; i < src.length - 1; i++) {
|
||||||
UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), src[i]);
|
UNSAFE.putLongUnaligned(null, dest + 8 * (i + 1), src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testOffHeapLong3")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" })
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
|
public static void testOffHeapLong3b(long dest, long[] src) {
|
||||||
|
for (int i = 0; i < src.length - 1; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(null, dest + 8L * (i + 1), src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testOffHeapLong3a", "testOffHeapLong3b"})
|
||||||
public static void testOffHeapLong3_runner() {
|
public static void testOffHeapLong3_runner() {
|
||||||
runAndVerify3(() -> testOffHeapLong3(baseOffHeap, longArray), 8);
|
runAndVerify3(() -> testOffHeapLong3a(baseOffHeap, longArray), 8);
|
||||||
|
runAndVerify3(() -> testOffHeapLong3b(baseOffHeap, longArray), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
applyIf = {"AlignVector", "false"})
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
// applyIf = {"AlignVector", "false"})
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
// AlignVector cannot guarantee that invar is aligned.
|
// AlignVector cannot guarantee that invar is aligned.
|
||||||
public static void testOffHeapLong4(long dest, long[] src, int start, int stop) {
|
public static void testOffHeapLong4a(long dest, long[] src, int start, int stop) {
|
||||||
for (int i = start; i < stop; i++) {
|
for (int i = start; i < stop; i++) {
|
||||||
UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, src[i]);
|
UNSAFE.putLongUnaligned(null, dest + 8 * i + baseOffset, src[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Run(test = "testOffHeapLong4")
|
@Test
|
||||||
|
@IR(counts = { IRNode.LOAD_VECTOR_L, "=0", IRNode.STORE_VECTOR, "=0" }) // temporary
|
||||||
|
// @IR(counts = { IRNode.LOAD_VECTOR_L, ">=1", IRNode.STORE_VECTOR, ">=1" },
|
||||||
|
// applyIf = {"AlignVector", "false"})
|
||||||
|
// FAILS: adr is CastX2P
|
||||||
|
// See: JDK-8331576
|
||||||
|
// AlignVector cannot guarantee that invar is aligned.
|
||||||
|
public static void testOffHeapLong4b(long dest, long[] src, int start, int stop) {
|
||||||
|
for (int i = start; i < stop; i++) {
|
||||||
|
UNSAFE.putLongUnaligned(null, dest + 8L * i + baseOffset, src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Run(test = {"testOffHeapLong4a", "testOffHeapLong4b"})
|
||||||
public static void testOffHeapLong4_runner() {
|
public static void testOffHeapLong4_runner() {
|
||||||
baseOffset = 8;
|
baseOffset = 8;
|
||||||
runAndVerify3(() -> testOffHeapLong4(baseOffHeap, longArray, 0, size-1), 8);
|
runAndVerify3(() -> testOffHeapLong4a(baseOffHeap, longArray, 0, size-1), 8);
|
||||||
|
runAndVerify3(() -> testOffHeapLong4b(baseOffHeap, longArray, 0, size-1), 8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1363,7 +1363,7 @@ public class TestAlignVector {
|
|||||||
static Object[] test17a(long[] a) {
|
static Object[] test17a(long[] a) {
|
||||||
// Unsafe: vectorizes with profiling (not xcomp)
|
// Unsafe: vectorizes with profiling (not xcomp)
|
||||||
for (int i = 0; i < RANGE; i++) {
|
for (int i = 0; i < RANGE; i++) {
|
||||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
|
||||||
long v = UNSAFE.getLongUnaligned(a, adr);
|
long v = UNSAFE.getLongUnaligned(a, adr);
|
||||||
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||||||
}
|
}
|
||||||
@ -1375,7 +1375,7 @@ public class TestAlignVector {
|
|||||||
static Object[] test17b(long[] a) {
|
static Object[] test17b(long[] a) {
|
||||||
// Not alignable
|
// Not alignable
|
||||||
for (int i = 0; i < RANGE-1; i++) {
|
for (int i = 0; i < RANGE-1; i++) {
|
||||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
|
||||||
long v = UNSAFE.getLongUnaligned(a, adr);
|
long v = UNSAFE.getLongUnaligned(a, adr);
|
||||||
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
UNSAFE.putLongUnaligned(a, adr, v + 1);
|
||||||
}
|
}
|
||||||
@ -1392,7 +1392,7 @@ public class TestAlignVector {
|
|||||||
static Object[] test17c(long[] a) {
|
static Object[] test17c(long[] a) {
|
||||||
// Unsafe: aligned vectorizes
|
// Unsafe: aligned vectorizes
|
||||||
for (int i = 0; i < RANGE-1; i+=4) {
|
for (int i = 0; i < RANGE-1; i+=4) {
|
||||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i;
|
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i;
|
||||||
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||||||
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||||||
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||||||
@ -1422,7 +1422,7 @@ public class TestAlignVector {
|
|||||||
static Object[] test17d(long[] a) {
|
static Object[] test17d(long[] a) {
|
||||||
// Not alignable
|
// Not alignable
|
||||||
for (int i = 0; i < RANGE-1; i+=4) {
|
for (int i = 0; i < RANGE-1; i+=4) {
|
||||||
int adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8 * i + 1;
|
long adr = UNSAFE.ARRAY_LONG_BASE_OFFSET + 8L * i + 1;
|
||||||
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
long v0 = UNSAFE.getLongUnaligned(a, adr + 0);
|
||||||
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
long v1 = UNSAFE.getLongUnaligned(a, adr + 8);
|
||||||
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
UNSAFE.putLongUnaligned(a, adr + 0, v0 + 1);
|
||||||
|
@ -1090,11 +1090,11 @@ public class TestAlignVectorFuzzer {
|
|||||||
int init = init_con_or_var();
|
int init = init_con_or_var();
|
||||||
int limit = limit_con_or_var();
|
int limit = limit_con_or_var();
|
||||||
int stride = stride_con();
|
int stride = stride_con();
|
||||||
int scale = scale_con();
|
long scale = scale_con();
|
||||||
int offset = offset1_con_or_var();
|
long offset = offset1_con_or_var();
|
||||||
|
|
||||||
for (int i = init; i < limit; i += stride) {
|
for (int i = init; i < limit; i += stride) {
|
||||||
int adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
|
long adr = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + i * scale;
|
||||||
int v = UNSAFE.getIntUnaligned(a, adr);
|
int v = UNSAFE.getIntUnaligned(a, adr);
|
||||||
UNSAFE.putIntUnaligned(a, adr, v + 1);
|
UNSAFE.putIntUnaligned(a, adr, v + 1);
|
||||||
}
|
}
|
||||||
@ -1105,19 +1105,19 @@ public class TestAlignVectorFuzzer {
|
|||||||
int init = init_con_or_var();
|
int init = init_con_or_var();
|
||||||
int limit = limit_con_or_var();
|
int limit = limit_con_or_var();
|
||||||
int stride = stride_con();
|
int stride = stride_con();
|
||||||
int scale = scale_con();
|
long scale = scale_con();
|
||||||
int offset1 = offset1_con_or_var();
|
long offset1 = offset1_con_or_var();
|
||||||
int offset2 = offset2_con_or_var();
|
long offset2 = offset2_con_or_var();
|
||||||
int offset3 = offset3_con_or_var();
|
long offset3 = offset3_con_or_var();
|
||||||
|
|
||||||
int h1 = hand_unrolling1_con();
|
int h1 = hand_unrolling1_con();
|
||||||
int h2 = hand_unrolling2_con();
|
int h2 = hand_unrolling2_con();
|
||||||
int h3 = hand_unrolling3_con();
|
int h3 = hand_unrolling3_con();
|
||||||
|
|
||||||
for (int i = init; i < limit; i += stride) {
|
for (int i = init; i < limit; i += stride) {
|
||||||
int adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
|
long adr1 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset1 + i * scale;
|
||||||
int adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
|
long adr2 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset2 + i * scale;
|
||||||
int adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
|
long adr3 = UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset3 + i * scale;
|
||||||
|
|
||||||
if (h1 >= 1) { UNSAFE.putIntUnaligned(a, adr1 + 0*4, UNSAFE.getIntUnaligned(a, adr1 + 0*4) + 1); }
|
if (h1 >= 1) { UNSAFE.putIntUnaligned(a, adr1 + 0*4, UNSAFE.getIntUnaligned(a, adr1 + 0*4) + 1); }
|
||||||
if (h1 >= 2) { UNSAFE.putIntUnaligned(a, adr1 + 1*4, UNSAFE.getIntUnaligned(a, adr1 + 1*4) + 1); }
|
if (h1 >= 2) { UNSAFE.putIntUnaligned(a, adr1 + 1*4, UNSAFE.getIntUnaligned(a, adr1 + 1*4) + 1); }
|
||||||
|
@ -172,10 +172,10 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
static void test2(int[] dataIa, int[] dataIb, float[] dataFa, float[] dataFb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// int and float arrays are two slices. But we pretend both are of type int.
|
// int and float arrays are two slices. But we pretend both are of type int.
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1);
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1);
|
||||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0);
|
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0);
|
||||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4);
|
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -248,10 +248,10 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// same as test2, except that reordering leads to different semantics
|
// same as test2, except that reordering leads to different semantics
|
||||||
// explanation analogue to test4
|
// explanation analogue to test4
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A
|
||||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
|
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
|
||||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
|
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] + 1); // B
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] + 1); // B
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -275,18 +275,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
long[] dataLa, long[] dataLb) {
|
long[] dataLa, long[] dataLb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// Chain of parallelizable op and conversion
|
// Chain of parallelizable op and conversion
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -307,18 +307,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
long[] dataLa, long[] dataLb) {
|
long[] dataLa, long[] dataLb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// Cycle involving 3 memory slices
|
// Cycle involving 3 memory slices
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -340,19 +340,19 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
long[] dataLa, long[] dataLb) {
|
long[] dataLa, long[] dataLb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// 2-cycle, with more ops after
|
// 2-cycle, with more ops after
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
// more stuff after
|
// more stuff after
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -373,19 +373,19 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
long[] dataLa, long[] dataLb) {
|
long[] dataLa, long[] dataLb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// 2-cycle, with more stuff before
|
// 2-cycle, with more stuff before
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
// 2-cycle
|
// 2-cycle
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) * 45;
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) * 45;
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3;
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,18 +423,18 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
//
|
//
|
||||||
// The cycle thus does not only go via packs, but also scalar ops.
|
// The cycle thus does not only go via packs, but also scalar ops.
|
||||||
//
|
//
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3; // A
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3; // A
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45; // R: constant mismatch
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45; // R: constant mismatch
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43; // S
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43; // S
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f; // U
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f; // U
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f; // V
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f; // V
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // B: moved down
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // B: moved down
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,8 +463,8 @@ public class TestIndependentPacksWithCyclicDependency {
|
|||||||
|
|
||||||
static void verify(String name, float[] data, float[] gold) {
|
static void verify(String name, float[] data, float[] gold) {
|
||||||
for (int i = 0; i < RANGE; i++) {
|
for (int i = 0; i < RANGE; i++) {
|
||||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||||
if (datav != goldv) {
|
if (datav != goldv) {
|
||||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||||
}
|
}
|
||||||
|
@ -58,18 +58,18 @@ public class TestIndependentPacksWithCyclicDependency2 {
|
|||||||
long[] dataLa, long[] dataLb) {
|
long[] dataLa, long[] dataLb) {
|
||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// For explanation, see test 10 in TestIndependentPacksWithCyclicDependency.java
|
// For explanation, see test 10 in TestIndependentPacksWithCyclicDependency.java
|
||||||
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0) + 3;
|
int v00 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0) + 3;
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, v00);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, v00);
|
||||||
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0) * 45;
|
int v10 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0) * 45;
|
||||||
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4) + 43;
|
int v11 = unsafe.getInt(dataFb, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4) + 43;
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0, v10);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0, v10);
|
||||||
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4, v11);
|
unsafe.putInt(dataLa, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4, v11);
|
||||||
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 0) + 0.55f;
|
float v20 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 0) + 0.55f;
|
||||||
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4 * i + 4) + 0.55f;
|
float v21 = unsafe.getFloat(dataLb, unsafe.ARRAY_LONG_BASE_OFFSET + 4L * i + 4) + 0.55f;
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0, v20);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0, v20);
|
||||||
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4, v21);
|
unsafe.putFloat(dataIb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4, v21);
|
||||||
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4) + 3; // moved down
|
int v01 = unsafe.getInt(dataIa, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4) + 3; // moved down
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, v01);
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, v01);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,8 +83,8 @@ public class TestIndependentPacksWithCyclicDependency2 {
|
|||||||
|
|
||||||
static void verify(String name, float[] data, float[] gold) {
|
static void verify(String name, float[] data, float[] gold) {
|
||||||
for (int i = 0; i < RANGE; i++) {
|
for (int i = 0; i < RANGE; i++) {
|
||||||
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
int datav = unsafe.getInt(data, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||||
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i);
|
int goldv = unsafe.getInt(gold, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i);
|
||||||
if (datav != goldv) {
|
if (datav != goldv) {
|
||||||
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
throw new RuntimeException(" Invalid " + name + " result: dataF[" + i + "]: " + datav + " != " + goldv);
|
||||||
}
|
}
|
||||||
|
@ -124,10 +124,10 @@ public class TestScheduleReordersScalarMemops {
|
|||||||
for (int i = 0; i < RANGE; i+=2) {
|
for (int i = 0; i < RANGE; i+=2) {
|
||||||
// Do the same as test0, but without int-float conversion.
|
// Do the same as test0, but without int-float conversion.
|
||||||
// This should reproduce on machines where conversion is not implemented.
|
// This should reproduce on machines where conversion is not implemented.
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 0, dataIa[i+0] + 1); // A +1
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 0, dataIa[i+0] + 1); // A +1
|
||||||
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 0); // X
|
dataIb[i+0] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 0); // X
|
||||||
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4 * i + 4); // Y
|
dataIb[i+1] = 11 * unsafe.getInt(dataFb, unsafe.ARRAY_INT_BASE_OFFSET + 4L * i + 4); // Y
|
||||||
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4 * i + 4, dataIa[i+1] * 11); // B *11
|
unsafe.putInt(dataFa, unsafe.ARRAY_FLOAT_BASE_OFFSET + 4L * i + 4, dataIa[i+1] * 11); // B *11
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user