8318446: C2: optimize stores into primitive arrays by combining values into larger store

Reviewed-by: kvn, thartmann
This commit is contained in:
Emanuel Peter 2024-04-24 06:44:14 +00:00
parent 5c3838605d
commit 3ccb64c021
9 changed files with 2652 additions and 4 deletions

View File

@ -103,6 +103,7 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
--add-exports java.base/jdk.internal.event=ALL-UNNAMED \ --add-exports java.base/jdk.internal.event=ALL-UNNAMED \
--add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \ --add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED \ --add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
--add-exports java.base/jdk.internal.util=ALL-UNNAMED \
--add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \ --add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
--add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \ --add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \ --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \

View File

@ -724,9 +724,9 @@ Node* AddPNode::Ideal_base_and_offset(Node* ptr, PhaseValues* phase,
//------------------------------unpack_offsets---------------------------------- //------------------------------unpack_offsets----------------------------------
// Collect the AddP offset values into the elements array, giving up // Collect the AddP offset values into the elements array, giving up
// if there are more than length. // if there are more than length.
int AddPNode::unpack_offsets(Node* elements[], int length) { int AddPNode::unpack_offsets(Node* elements[], int length) const {
int count = 0; int count = 0;
Node* addr = this; Node const* addr = this;
Node* base = addr->in(AddPNode::Base); Node* base = addr->in(AddPNode::Base);
while (addr->is_AddP()) { while (addr->is_AddP()) {
if (addr->in(AddPNode::Base) != base) { if (addr->in(AddPNode::Base) != base) {

View File

@ -181,7 +181,7 @@ public:
// Collect the AddP offset values into the elements array, giving up // Collect the AddP offset values into the elements array, giving up
// if there are more than length. // if there are more than length.
int unpack_offsets(Node* elements[], int length); int unpack_offsets(Node* elements[], int length) const;
// Do not match base-ptr edge // Do not match base-ptr edge
virtual uint match_edge(uint idx) const; virtual uint match_edge(uint idx) const;

View File

@ -353,6 +353,12 @@
develop(bool, TraceNewVectors, false, \ develop(bool, TraceNewVectors, false, \
"Trace creation of Vector nodes") \ "Trace creation of Vector nodes") \
\ \
product(bool, MergeStores, true, DIAGNOSTIC, \
"Optimize stores by combining values into larger store") \
\
develop(bool, TraceMergeStores, false, \
"Trace creation of merged stores") \
\
product_pd(bool, OptoBundling, \ product_pd(bool, OptoBundling, \
"Generate nops to fill i-cache lines") \ "Generate nops to fill i-cache lines") \
\ \

View File

@ -931,6 +931,7 @@ Compile::Compile( ciEnv* ci_env,
_directive(directive), _directive(directive),
_log(ci_env->log()), _log(ci_env->log()),
_first_failure_details(nullptr), _first_failure_details(nullptr),
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
_congraph(nullptr), _congraph(nullptr),
NOT_PRODUCT(_igv_printer(nullptr) COMMA) NOT_PRODUCT(_igv_printer(nullptr) COMMA)
_unique(0), _unique(0),

View File

@ -2697,6 +2697,683 @@ uint StoreNode::hash() const {
return NO_HASH; return NO_HASH;
} }
// Class to parse array pointers, and determine if they are adjacent. We parse the form:
//
// pointer = base
// + constant_offset
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
// + sum(other_offsets)
//
//
// Note: we accumulate all constant offsets into constant_offset, even the int constant behind
// the "LShiftL(ConvI2L(...))" pattern. We convert "ConvI2L(int_offset + int_con)" to
// "ConvI2L(int_offset) + int_con", which is only safe if we can assume that either all
// compared addresses have an overflow for "int_offset + int_con" or none.
// For loads and stores on arrays, we know that if one overflows and the other not, then
// the two addresses lay almost max_int indices apart, but the maximal array size is
// only about half of that. Therefore, the RangeCheck on at least one of them must have
// failed.
//
// constant_offset += LShiftL( ConvI2L(int_con), int_offset_shift)
//
// pointer = base
// + constant_offset
// + LShiftL( ConvI2L(int_offset), int_offset_shift)
// + sum(other_offsets)
//
class ArrayPointer {
private:
const bool _is_valid; // The parsing succeeded
const Node* _pointer; // The final pointer to the position in the array
const Node* _base; // Base address of the array
const jlong _constant_offset; // Sum of collected constant offsets
const Node* _int_offset; // (optional) Offset behind LShiftL and ConvI2L
const jint _int_offset_shift; // (optional) Shift value for int_offset
const GrowableArray<Node*>* _other_offsets; // List of other AddP offsets
ArrayPointer(const bool is_valid,
const Node* pointer,
const Node* base,
const jlong constant_offset,
const Node* int_offset,
const jint int_offset_shift,
const GrowableArray<Node*>* other_offsets) :
_is_valid(is_valid),
_pointer(pointer),
_base(base),
_constant_offset(constant_offset),
_int_offset(int_offset),
_int_offset_shift(int_offset_shift),
_other_offsets(other_offsets)
{
assert(_pointer != nullptr, "must always have pointer");
assert(is_valid == (_base != nullptr), "have base exactly if valid");
assert(is_valid == (_other_offsets != nullptr), "have other_offsets exactly if valid");
}
static ArrayPointer make_invalid(const Node* pointer) {
return ArrayPointer(false, pointer, nullptr, 0, nullptr, 0, nullptr);
}
static bool parse_int_offset(Node* offset, Node*& int_offset, jint& int_offset_shift) {
// offset = LShiftL( ConvI2L(int_offset), int_offset_shift)
if (offset->Opcode() == Op_LShiftL &&
offset->in(1)->Opcode() == Op_ConvI2L &&
offset->in(2)->Opcode() == Op_ConI) {
int_offset = offset->in(1)->in(1); // LShiftL -> ConvI2L -> int_offset
int_offset_shift = offset->in(2)->get_int(); // LShiftL -> int_offset_shift
return true;
}
// offset = ConvI2L(int_offset) = LShiftL( ConvI2L(int_offset), 0)
if (offset->Opcode() == Op_ConvI2L) {
int_offset = offset->in(1);
int_offset_shift = 0;
return true;
}
// parse failed
return false;
}
public:
// Parse the structure above the pointer
static ArrayPointer make(PhaseGVN* phase, const Node* pointer) {
assert(phase->type(pointer)->isa_aryptr() != nullptr, "must be array pointer");
if (!pointer->is_AddP()) { return ArrayPointer::make_invalid(pointer); }
const Node* base = pointer->in(AddPNode::Base);
if (base == nullptr) { return ArrayPointer::make_invalid(pointer); }
const int search_depth = 5;
Node* offsets[search_depth];
int count = pointer->as_AddP()->unpack_offsets(offsets, search_depth);
// We expect at least a constant each
if (count <= 0) { return ArrayPointer::make_invalid(pointer); }
// We extract the form:
//
// pointer = base
// + constant_offset
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
// + sum(other_offsets)
//
jlong constant_offset = 0;
Node* int_offset = nullptr;
jint int_offset_shift = 0;
GrowableArray<Node*>* other_offsets = new GrowableArray<Node*>(count);
for (int i = 0; i < count; i++) {
Node* offset = offsets[i];
if (offset->Opcode() == Op_ConI) {
// Constant int offset
constant_offset += offset->get_int();
} else if (offset->Opcode() == Op_ConL) {
// Constant long offset
constant_offset += offset->get_long();
} else if(int_offset == nullptr && parse_int_offset(offset, int_offset, int_offset_shift)) {
// LShiftL( ConvI2L(int_offset), int_offset_shift)
int_offset = int_offset->uncast();
if (int_offset->Opcode() == Op_AddI && int_offset->in(2)->Opcode() == Op_ConI) {
// LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
constant_offset += ((jlong)int_offset->in(2)->get_int()) << int_offset_shift;
int_offset = int_offset->in(1);
}
} else {
// All others
other_offsets->append(offset);
}
}
return ArrayPointer(true, pointer, base, constant_offset, int_offset, int_offset_shift, other_offsets);
}
bool is_adjacent_to_and_before(const ArrayPointer& other, const jlong data_size) const {
if (!_is_valid || !other._is_valid) { return false; }
// Offset adjacent?
if (this->_constant_offset + data_size != other._constant_offset) { return false; }
// All other components identical?
if (this->_base != other._base ||
this->_int_offset != other._int_offset ||
this->_int_offset_shift != other._int_offset_shift ||
this->_other_offsets->length() != other._other_offsets->length()) {
return false;
}
for (int i = 0; i < this->_other_offsets->length(); i++) {
Node* o1 = this->_other_offsets->at(i);
Node* o2 = other._other_offsets->at(i);
if (o1 != o2) { return false; }
}
return true;
}
#ifndef PRODUCT
void dump() {
if (!_is_valid) {
tty->print("ArrayPointer[%d %s, invalid]", _pointer->_idx, _pointer->Name());
return;
}
tty->print("ArrayPointer[%d %s, base[%d %s] + %lld",
_pointer->_idx, _pointer->Name(),
_base->_idx, _base->Name(),
(long long)_constant_offset);
if (_int_offset != 0) {
tty->print(" + I2L[%d %s] << %d",
_int_offset->_idx, _int_offset->Name(), _int_offset_shift);
}
for (int i = 0; i < _other_offsets->length(); i++) {
Node* n = _other_offsets->at(i);
tty->print(" + [%d %s]", n->_idx, n->Name());
}
tty->print_cr("]");
}
#endif
};
// Link together multiple stores (B/S/C/I) into a longer one.
//
// Example: _store = StoreB[i+3]
//
// RangeCheck[i+0] RangeCheck[i+0]
// StoreB[i+0]
// RangeCheck[i+1] RangeCheck[i+1]
// StoreB[i+1] --> pass: fail:
// StoreB[i+2] StoreI[i+0] StoreB[i+0]
// StoreB[i+3]
//
// The 4 StoreB are merged into a single StoreI node. We have to be careful with RangeCheck[i+1]: before
// the optimization, if this RangeCheck[i+1] fails, then we execute only StoreB[i+0], and then trap. After
// the optimization, the new StoreI[i+0] is on the passing path of RangeCheck[i+1], and StoreB[i+0] on the
// failing path.
//
// Note: For normal array stores, every store at first has a RangeCheck. But they can be removed with:
// - RCE (RangeCheck Elimination): the RangeChecks in the loop are hoisted out and before the loop,
// and possibly no RangeChecks remain between the stores.
// - RangeCheck smearing: the earlier RangeChecks are adjusted such that they cover later RangeChecks,
// and those later RangeChecks can be removed. Example:
//
// RangeCheck[i+0] RangeCheck[i+0] <- before first store
// StoreB[i+0] StoreB[i+0] <- first store
// RangeCheck[i+1] --> smeared --> RangeCheck[i+3] <- only RC between first and last store
// StoreB[i+0] StoreB[i+1] <- second store
// RangeCheck[i+2] --> removed
// StoreB[i+0] StoreB[i+2]
// RangeCheck[i+3] --> removed
// StoreB[i+0] StoreB[i+3] <- last store
//
// Thus, it is a common pattern that between the first and last store in a chain
// of adjacent stores there remains exactly one RangeCheck, located between the
// first and the second store (e.g. RangeCheck[i+3]).
//
class MergePrimitiveArrayStores : public StackObj {
private:
PhaseGVN* _phase;
StoreNode* _store;
public:
MergePrimitiveArrayStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {}
StoreNode* run();
private:
bool is_compatible_store(const StoreNode* other_store) const;
bool is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const;
bool is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const;
static bool is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out);
enum CFGStatus { SuccessNoRangeCheck, SuccessWithRangeCheck, Failure };
static CFGStatus cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store);
class Status {
private:
StoreNode* _found_store;
bool _found_range_check;
Status(StoreNode* found_store, bool found_range_check)
: _found_store(found_store), _found_range_check(found_range_check) {}
public:
StoreNode* found_store() const { return _found_store; }
bool found_range_check() const { return _found_range_check; }
static Status make_failure() { return Status(nullptr, false); }
static Status make(StoreNode* found_store, const CFGStatus cfg_status) {
if (cfg_status == CFGStatus::Failure) {
return Status::make_failure();
}
return Status(found_store, cfg_status == CFGStatus::SuccessWithRangeCheck);
}
};
Status find_adjacent_use_store(const StoreNode* def_store) const;
Status find_adjacent_def_store(const StoreNode* use_store) const;
Status find_use_store(const StoreNode* def_store) const;
Status find_def_store(const StoreNode* use_store) const;
Status find_use_store_unidirectional(const StoreNode* def_store) const;
Status find_def_store_unidirectional(const StoreNode* use_store) const;
void collect_merge_list(Node_List& merge_list) const;
Node* make_merged_input_value(const Node_List& merge_list);
StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value);
DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; )
};
StoreNode* MergePrimitiveArrayStores::run() {
// Check for B/S/C/I
int opc = _store->Opcode();
if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) {
return nullptr;
}
// Only merge stores on arrays, and the stores must have the same size as the elements.
const TypeAryPtr* aryptr_t = _store->adr_type()->isa_aryptr();
if (aryptr_t == nullptr ||
type2aelembytes(aryptr_t->elem()->array_element_basic_type()) != _store->memory_size()) {
return nullptr;
}
// The _store must be the "last" store in a chain. If we find a use we could merge with
// then that use or a store further down is the "last" store.
Status status_use = find_adjacent_use_store(_store);
if (status_use.found_store() != nullptr) {
return nullptr;
}
// Check if we can merge with at least one def, so that we have at least 2 stores to merge.
Status status_def = find_adjacent_def_store(_store);
if (status_def.found_store() == nullptr) {
return nullptr;
}
ResourceMark rm;
Node_List merge_list;
collect_merge_list(merge_list);
Node* merged_input_value = make_merged_input_value(merge_list);
if (merged_input_value == nullptr) { return nullptr; }
StoreNode* merged_store = make_merged_store(merge_list, merged_input_value);
DEBUG_ONLY( if(TraceMergeStores) { trace(merge_list, merged_input_value, merged_store); } )
return merged_store;
}
// Check compatibility between _store and other_store.
bool MergePrimitiveArrayStores::is_compatible_store(const StoreNode* other_store) const {
int opc = _store->Opcode();
assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition");
assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store");
if (other_store == nullptr ||
_store->Opcode() != other_store->Opcode() ||
other_store->adr_type()->isa_aryptr() == nullptr) {
return false;
}
// Check that the size of the stores, and the array elements are all the same.
const TypeAryPtr* aryptr_t1 = _store->adr_type()->is_aryptr();
const TypeAryPtr* aryptr_t2 = other_store->adr_type()->is_aryptr();
int size1 = type2aelembytes(aryptr_t1->elem()->array_element_basic_type());
int size2 = type2aelembytes(aryptr_t2->elem()->array_element_basic_type());
if (size1 != size2 ||
size1 != _store->memory_size() ||
_store->memory_size() != other_store->memory_size()) {
return false;
}
return true;
}
bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const {
if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn),
use_store->in(MemNode::ValueIn),
def_store->memory_size())) {
return false;
}
ResourceMark rm;
ArrayPointer array_pointer_use = ArrayPointer::make(_phase, use_store->in(MemNode::Address));
ArrayPointer array_pointer_def = ArrayPointer::make(_phase, def_store->in(MemNode::Address));
if (!array_pointer_def.is_adjacent_to_and_before(array_pointer_use, use_store->memory_size())) {
return false;
}
return true;
}
bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const {
// Pattern: [n1 = ConI, n2 = ConI]
if (n1->Opcode() == Op_ConI) {
return n2->Opcode() == Op_ConI;
}
// Pattern: [n1 = base >> shift, n2 = base >> (shift + memory_size)]
Node const* base_n2;
jint shift_n2;
if (!is_con_RShift(n2, base_n2, shift_n2)) {
return false;
}
if (n1->Opcode() == Op_ConvL2I) {
// look through
n1 = n1->in(1);
}
Node const* base_n1;
jint shift_n1;
if (n1 == base_n2) {
// n1 = base = base >> 0
base_n1 = n1;
shift_n1 = 0;
} else if (!is_con_RShift(n1, base_n1, shift_n1)) {
return false;
}
int bits_per_store = memory_size * 8;
if (base_n1 != base_n2 ||
shift_n1 + bits_per_store != shift_n2 ||
shift_n1 % bits_per_store != 0) {
return false;
}
// both load from same value with correct shift
return true;
}
// Detect pattern: n = base_out >> shift_out
bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) {
assert(n != nullptr, "precondition");
int opc = n->Opcode();
if (opc == Op_ConvL2I) {
n = n->in(1);
opc = n->Opcode();
}
if ((opc == Op_RShiftI ||
opc == Op_RShiftL ||
opc == Op_URShiftI ||
opc == Op_URShiftL) &&
n->in(2)->is_ConI()) {
base_out = n->in(1);
shift_out = n->in(2)->get_int();
assert(shift_out >= 0, "must be positive");
return true;
}
return false;
}
// Check if there is nothing between the two stores, except optionally a RangeCheck leading to an uncommon trap.
MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) {
assert(use_store->in(MemNode::Memory) == def_store, "use-def relationship");
Node* ctrl_use = use_store->in(MemNode::Control);
Node* ctrl_def = def_store->in(MemNode::Control);
if (ctrl_use == nullptr || ctrl_def == nullptr) {
return CFGStatus::Failure;
}
if (ctrl_use == ctrl_def) {
// Same ctrl -> no RangeCheck in between.
// Check: use_store must be the only use of def_store.
if (def_store->outcnt() > 1) {
return CFGStatus::Failure;
}
return CFGStatus::SuccessNoRangeCheck;
}
// Different ctrl -> could have RangeCheck in between.
// Check: 1. def_store only has these uses: use_store and MergeMem for uncommon trap, and
// 2. ctrl separated by RangeCheck.
if (def_store->outcnt() != 2) {
return CFGStatus::Failure; // Cannot have exactly these uses: use_store and MergeMem for uncommon trap.
}
int use_store_out_idx = def_store->raw_out(0) == use_store ? 0 : 1;
Node* merge_mem = def_store->raw_out(1 - use_store_out_idx)->isa_MergeMem();
if (merge_mem == nullptr ||
merge_mem->outcnt() != 1) {
return CFGStatus::Failure; // Does not have MergeMem for uncommon trap.
}
if (!ctrl_use->is_IfProj() ||
!ctrl_use->in(0)->is_RangeCheck() ||
ctrl_use->in(0)->outcnt() != 2) {
return CFGStatus::Failure; // Not RangeCheck.
}
ProjNode* other_proj = ctrl_use->as_IfProj()->other_if_proj();
Node* trap = other_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check);
if (trap != merge_mem->unique_out() ||
ctrl_use->in(0)->in(0) != ctrl_def) {
return CFGStatus::Failure; // Not RangeCheck with merge_mem leading to uncommon trap.
}
return CFGStatus::SuccessWithRangeCheck;
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_store(const StoreNode* def_store) const {
Status status_use = find_use_store(def_store);
StoreNode* use_store = status_use.found_store();
if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
return Status::make_failure();
}
return status_use;
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_store(const StoreNode* use_store) const {
Status status_def = find_def_store(use_store);
StoreNode* def_store = status_def.found_store();
if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
return Status::make_failure();
}
return status_def;
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(const StoreNode* def_store) const {
Status status_use = find_use_store_unidirectional(def_store);
#ifdef ASSERT
StoreNode* use_store = status_use.found_store();
if (use_store != nullptr) {
Status status_def = find_def_store_unidirectional(use_store);
assert(status_def.found_store() == def_store &&
status_def.found_range_check() == status_use.found_range_check(),
"find_use_store and find_def_store must be symmetric");
}
#endif
return status_use;
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(const StoreNode* use_store) const {
Status status_def = find_def_store_unidirectional(use_store);
#ifdef ASSERT
StoreNode* def_store = status_def.found_store();
if (def_store != nullptr) {
Status status_use = find_use_store_unidirectional(def_store);
assert(status_use.found_store() == use_store &&
status_use.found_range_check() == status_def.found_range_check(),
"find_use_store and find_def_store must be symmetric");
}
#endif
return status_def;
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unidirectional(const StoreNode* def_store) const {
assert(is_compatible_store(def_store), "precondition: must be compatible with _store");
for (DUIterator_Fast imax, i = def_store->fast_outs(imax); i < imax; i++) {
StoreNode* use_store = def_store->fast_out(i)->isa_Store();
if (is_compatible_store(use_store)) {
return Status::make(use_store, cfg_status_for_pair(use_store, def_store));
}
}
return Status::make_failure();
}
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unidirectional(const StoreNode* use_store) const {
assert(is_compatible_store(use_store), "precondition: must be compatible with _store");
StoreNode* def_store = use_store->in(MemNode::Memory)->isa_Store();
if (!is_compatible_store(def_store)) {
return Status::make_failure();
}
return Status::make(def_store, cfg_status_for_pair(use_store, def_store));
}
void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const {
// The merged store can be at most 8 bytes.
const uint merge_list_max_size = 8 / _store->memory_size();
assert(merge_list_max_size >= 2 &&
merge_list_max_size <= 8 &&
is_power_of_2(merge_list_max_size),
"must be 2, 4 or 8");
// Traverse up the chain of adjacent def stores.
StoreNode* current = _store;
merge_list.push(current);
while (current != nullptr && merge_list.size() < merge_list_max_size) {
Status status = find_adjacent_def_store(current);
current = status.found_store();
if (current != nullptr) {
merge_list.push(current);
// We can have at most one RangeCheck.
if (status.found_range_check()) {
break;
}
}
}
// Truncate the merge_list to a power of 2.
const uint pow2size = round_down_power_of_2(merge_list.size());
assert(pow2size >= 2, "must be merging at least 2 stores");
while (merge_list.size() > pow2size) { merge_list.pop(); }
}
// Merge the input values of the smaller stores to a single larger input value.
Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_list) {
int new_memory_size = _store->memory_size() * merge_list.size();
Node* first = merge_list.at(merge_list.size()-1);
Node* merged_input_value = nullptr;
if (_store->in(MemNode::ValueIn)->Opcode() == Op_ConI) {
// Pattern: [ConI, ConI, ...] -> new constant
jlong con = 0;
jlong bits_per_store = _store->memory_size() * 8;
jlong mask = (((jlong)1) << bits_per_store) - 1;
for (uint i = 0; i < merge_list.size(); i++) {
jlong con_i = merge_list.at(i)->in(MemNode::ValueIn)->get_int();
con = con << bits_per_store;
con = con | (mask & con_i);
}
merged_input_value = _phase->longcon(con);
} else {
// Pattern: [base >> 24, base >> 16, base >> 8, base] -> base
// | |
// _store first
//
merged_input_value = first->in(MemNode::ValueIn);
Node const* base_last;
jint shift_last;
bool is_true = is_con_RShift(_store->in(MemNode::ValueIn), base_last, shift_last);
assert(is_true, "must detect con RShift");
if (merged_input_value != base_last && merged_input_value->Opcode() == Op_ConvL2I) {
// look through
merged_input_value = merged_input_value->in(1);
}
if (merged_input_value != base_last) {
// merged_input_value is not the base
return nullptr;
}
}
if (_phase->type(merged_input_value)->isa_long() != nullptr && new_memory_size <= 4) {
// Example:
//
// long base = ...;
// a[0] = (byte)(base >> 0);
// a[1] = (byte)(base >> 8);
//
merged_input_value = _phase->transform(new ConvL2INode(merged_input_value));
}
assert((_phase->type(merged_input_value)->isa_int() != nullptr && new_memory_size <= 4) ||
(_phase->type(merged_input_value)->isa_long() != nullptr && new_memory_size == 8),
"merged_input_value is either int or long, and new_memory_size is small enough");
return merged_input_value;
}
// //
// first_ctrl first_mem first_adr first_ctrl first_mem first_adr //
// | | | | | | //
// | | | | +---------------+ | //
// | | | | | | | //
// | | +---------+ | | +---------------+ //
// | | | | | | | | //
// +--------------+ | | v1 +------------------------------+ | | v1 //
// | | | | | | | | | | | | //
// RangeCheck first_store RangeCheck | | first_store //
// | | | | | | | //
// last_ctrl | +----> unc_trap last_ctrl | | +----> unc_trap //
// | | ===> | | | //
// +--------------+ | a2 v2 | | | //
// | | | | | | | | //
// | second_store | | | //
// | | | | | [v1 v2 ... vn] //
// ... ... | | | | //
// | | | | | v //
// +--------------+ | an vn +--------------+ | | merged_input_value //
// | | | | | | | | //
// last_store (= _store) merged_store //
// //
StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) {
Node* first_store = merge_list.at(merge_list.size()-1);
Node* last_ctrl = _store->in(MemNode::Control); // after (optional) RangeCheck
Node* first_mem = first_store->in(MemNode::Memory);
Node* first_adr = first_store->in(MemNode::Address);
const TypePtr* new_adr_type = _store->adr_type();
int new_memory_size = _store->memory_size() * merge_list.size();
BasicType bt = T_ILLEGAL;
switch (new_memory_size) {
case 2: bt = T_SHORT; break;
case 4: bt = T_INT; break;
case 8: bt = T_LONG; break;
}
StoreNode* merged_store = StoreNode::make(*_phase, last_ctrl, first_mem, first_adr,
new_adr_type, merged_input_value, bt, MemNode::unordered);
// Marking the store mismatched is sufficient to prevent reordering, since array stores
// are all on the same slice. Hence, we need no barriers.
merged_store->set_mismatched_access();
// Constants above may now also be be packed -> put candidate on worklist
_phase->is_IterGVN()->_worklist.push(first_mem);
return merged_store;
}
#ifdef ASSERT
void MergePrimitiveArrayStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const {
stringStream ss;
ss.print_cr("[TraceMergeStores]: Replace");
for (int i = (int)merge_list.size() - 1; i >= 0; i--) {
merge_list.at(i)->dump("\n", false, &ss);
}
ss.print_cr("[TraceMergeStores]: with");
merged_input_value->dump("\n", false, &ss);
merged_store->dump("\n", false, &ss);
tty->print("%s", ss.as_string());
}
#endif
//------------------------------Ideal------------------------------------------ //------------------------------Ideal------------------------------------------
// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x). // Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
// When a store immediately follows a relevant allocation/initialization, // When a store immediately follows a relevant allocation/initialization,
@ -2782,6 +3459,18 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
} }
} }
#ifdef VM_LITTLE_ENDIAN
if (MergeStores && UseUnalignedAccesses) {
if (phase->C->post_loop_opts_phase()) {
MergePrimitiveArrayStores merge(phase, this);
Node* progress = merge.run();
if (progress != nullptr) { return progress; }
} else {
phase->C->record_for_post_loop_opts_igvn(this);
}
}
#endif
return nullptr; // No further progress return nullptr; // No further progress
} }

View File

@ -2273,7 +2273,15 @@ void PhasePeephole::print_statistics() {
//------------------------------set_req_X-------------------------------------- //------------------------------set_req_X--------------------------------------
void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) { void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
assert( is_not_dead(n), "can not use dead node"); assert( is_not_dead(n), "can not use dead node");
assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" ); #ifdef ASSERT
if (igvn->hash_find(this) == this) {
tty->print_cr("Need to remove from hash before changing edges");
this->dump(1);
tty->print_cr("Set at i = %d", i);
n->dump();
assert(false, "Need to remove from hash before changing edges");
}
#endif
Node *old = in(i); Node *old = in(i);
set_req(i, n); set_req(i, n);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,696 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import jdk.internal.misc.Unsafe;
import jdk.internal.util.ByteArrayLittleEndian;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 3, time = 3)
@Fork(value = 3, jvmArgsAppend = {
"--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
"--add-exports", "java.base/jdk.internal.util=ALL-UNNAMED"})
@State(Scope.Benchmark)
public class MergeStores {
public static final int RANGE = 100;
static Unsafe UNSAFE = Unsafe.getUnsafe();
@Param("1")
public static short vS;
@Param("1")
public static int vI;
@Param("1")
public static long vL;
public static int offset = 5;
public static byte[] aB = new byte[RANGE];
public static short[] aS = new short[RANGE];
public static int[] aI = new int[RANGE];
// -------------------------------------------
// ------- Little-Endian API ----------
// -------------------------------------------
// Store a short LE into an array using store bytes in an array
static void storeShortLE(byte[] bytes, int offset, short value) {
storeBytes(bytes, offset, (byte)(value >> 0),
(byte)(value >> 8));
}
// Store an int LE into an array using store bytes in an array
static void storeIntLE(byte[] bytes, int offset, int value) {
storeBytes(bytes, offset, (byte)(value >> 0 ),
(byte)(value >> 8 ),
(byte)(value >> 16),
(byte)(value >> 24));
}
// Store an int LE into an array using store bytes in an array
static void storeLongLE(byte[] bytes, int offset, long value) {
storeBytes(bytes, offset, (byte)(value >> 0 ),
(byte)(value >> 8 ),
(byte)(value >> 16),
(byte)(value >> 24),
(byte)(value >> 32),
(byte)(value >> 40),
(byte)(value >> 48),
(byte)(value >> 56));
}
// Store 2 bytes into an array
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1) {
bytes[offset + 0] = b0;
bytes[offset + 1] = b1;
}
// Store 4 bytes into an array
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1, byte b2, byte b3) {
bytes[offset + 0] = b0;
bytes[offset + 1] = b1;
bytes[offset + 2] = b2;
bytes[offset + 3] = b3;
}
// Store 8 bytes into an array
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1, byte b2, byte b3,
byte b4, byte b5, byte b6, byte b7) {
bytes[offset + 0] = b0;
bytes[offset + 1] = b1;
bytes[offset + 2] = b2;
bytes[offset + 3] = b3;
bytes[offset + 4] = b4;
bytes[offset + 5] = b5;
bytes[offset + 6] = b6;
bytes[offset + 7] = b7;
}
// -------------------------------- BENCHMARKS --------------------------------
@Benchmark
public void baseline() {
}
@Benchmark
public byte[] baseline_allocate() {
byte[] aB = new byte[RANGE];
return aB;
}
@Benchmark
public byte[] store_B2_con_adr0_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[0] = (byte)0x01;
aB[1] = (byte)0x02;
return aB;
}
@Benchmark
public byte[] store_B2_con_adr1_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[1] = (byte)0x01;
aB[2] = (byte)0x02;
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setShort(aB, offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeShortLE(aB, offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_nonalloc_direct() {
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_nonalloc_unsafe() {
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_nonalloc_bale() {
ByteArrayLittleEndian.setShort(aB, offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_con_offs_nonalloc_leapi() {
storeShortLE(aB, offset, (short)0x0201);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)(vS >> 0 );
aB[offset + 1] = (byte)(vS >> 8 );
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vS);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setShort(aB, offset, vS);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeShortLE(aB, offset, vS);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_nonalloc_direct() {
aB[offset + 0] = (byte)(vS >> 0 );
aB[offset + 1] = (byte)(vS >> 8 );
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_nonalloc_unsafe() {
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vS);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_nonalloc_bale() {
ByteArrayLittleEndian.setShort(aB, offset, vS);
return aB;
}
@Benchmark
public byte[] store_B2_S_offs_nonalloc_leapi() {
storeShortLE(aB, offset, vS);
return aB;
}
@Benchmark
public byte[] store_B4_con_adr0_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[0] = (byte)0x01;
aB[1] = (byte)0x02;
aB[2] = (byte)0x03;
aB[3] = (byte)0x04;
return aB;
}
@Benchmark
public byte[] store_B4_con_adr1_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[1] = (byte)0x01;
aB[2] = (byte)0x02;
aB[3] = (byte)0x03;
aB[4] = (byte)0x04;
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
aB[offset + 2] = (byte)0x03;
aB[offset + 3] = (byte)0x04;
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setInt(aB, offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeIntLE(aB, offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_nonalloc_direct() {
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
aB[offset + 2] = (byte)0x03;
aB[offset + 3] = (byte)0x04;
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_nonalloc_unsafe() {
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_nonalloc_bale() {
ByteArrayLittleEndian.setInt(aB, offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_con_offs_nonalloc_leapi() {
storeIntLE(aB, offset, 0x04030201);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)(vI >> 0 );
aB[offset + 1] = (byte)(vI >> 8 );
aB[offset + 2] = (byte)(vI >> 16);
aB[offset + 3] = (byte)(vI >> 24);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vI);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setInt(aB, offset, vI);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeIntLE(aB, offset, vI);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_nonalloc_direct() {
aB[offset + 0] = (byte)(vI >> 0 );
aB[offset + 1] = (byte)(vI >> 8 );
aB[offset + 2] = (byte)(vI >> 16);
aB[offset + 3] = (byte)(vI >> 24);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_nonalloc_unsafe() {
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vI);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_nonalloc_bale() {
ByteArrayLittleEndian.setInt(aB, offset, vI);
return aB;
}
@Benchmark
public byte[] store_B4_I_offs_nonalloc_leapi() {
storeIntLE(aB, offset, vI);
return aB;
}
@Benchmark
public byte[] store_B8_con_adr0_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[0] = (byte)0x01;
aB[1] = (byte)0x02;
aB[2] = (byte)0x03;
aB[3] = (byte)0x04;
aB[4] = (byte)0x05;
aB[5] = (byte)0x06;
aB[6] = (byte)0x07;
aB[7] = (byte)0x08;
return aB;
}
@Benchmark
public byte[] store_B8_con_adr1_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[1] = (byte)0x01;
aB[2] = (byte)0x02;
aB[3] = (byte)0x03;
aB[4] = (byte)0x04;
aB[5] = (byte)0x05;
aB[6] = (byte)0x06;
aB[7] = (byte)0x07;
aB[8] = (byte)0x08;
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
aB[offset + 2] = (byte)0x03;
aB[offset + 3] = (byte)0x04;
aB[offset + 4] = (byte)0x05;
aB[offset + 5] = (byte)0x06;
aB[offset + 6] = (byte)0x07;
aB[offset + 7] = (byte)0x08;
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setLong(aB, offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeLongLE(aB, offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_nonalloc_direct() {
aB[offset + 0] = (byte)0x01;
aB[offset + 1] = (byte)0x02;
aB[offset + 2] = (byte)0x03;
aB[offset + 3] = (byte)0x04;
aB[offset + 4] = (byte)0x05;
aB[offset + 5] = (byte)0x06;
aB[offset + 6] = (byte)0x07;
aB[offset + 7] = (byte)0x08;
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_nonalloc_unsafe() {
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_nonalloc_bale() {
ByteArrayLittleEndian.setLong(aB, offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_con_offs_nonalloc_leapi() {
storeLongLE(aB, offset, 0x0807060504030201L);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)(vL >> 0 );
aB[offset + 1] = (byte)(vL >> 8 );
aB[offset + 2] = (byte)(vL >> 16);
aB[offset + 3] = (byte)(vL >> 24);
aB[offset + 4] = (byte)(vL >> 32);
aB[offset + 5] = (byte)(vL >> 40);
aB[offset + 6] = (byte)(vL >> 48);
aB[offset + 7] = (byte)(vL >> 56);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setLong(aB, offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeLongLE(aB, offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_nonalloc_direct() {
aB[offset + 0] = (byte)(vL >> 0 );
aB[offset + 1] = (byte)(vL >> 8 );
aB[offset + 2] = (byte)(vL >> 16);
aB[offset + 3] = (byte)(vL >> 24);
aB[offset + 4] = (byte)(vL >> 32);
aB[offset + 5] = (byte)(vL >> 40);
aB[offset + 6] = (byte)(vL >> 48);
aB[offset + 7] = (byte)(vL >> 56);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_nonalloc_unsafe() {
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_nonalloc_bale() {
ByteArrayLittleEndian.setLong(aB, offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_L_offs_nonalloc_leapi() {
storeLongLE(aB, offset, vL);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_allocate_direct() {
byte[] aB = new byte[RANGE];
aB[offset + 0] = (byte)(vI >> 0 );
aB[offset + 1] = (byte)(vI >> 8 );
aB[offset + 2] = (byte)(vI >> 16);
aB[offset + 3] = (byte)(vI >> 24);
aB[offset + 4] = (byte)(vI >> 0 );
aB[offset + 5] = (byte)(vI >> 8 );
aB[offset + 6] = (byte)(vI >> 16);
aB[offset + 7] = (byte)(vI >> 24);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_allocate_unsafe() {
byte[] aB = new byte[RANGE];
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vI);
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, vI);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_allocate_bale() {
byte[] aB = new byte[RANGE];
ByteArrayLittleEndian.setInt(aB, offset + 0, vI);
ByteArrayLittleEndian.setInt(aB, offset + 4, vI);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_allocate_leapi() {
byte[] aB = new byte[RANGE];
storeIntLE(aB, offset + 0, vI);
storeIntLE(aB, offset + 4, vI);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_nonalloc_direct() {
aB[offset + 0] = (byte)(vI >> 0 );
aB[offset + 1] = (byte)(vI >> 8 );
aB[offset + 2] = (byte)(vI >> 16);
aB[offset + 3] = (byte)(vI >> 24);
aB[offset + 4] = (byte)(vI >> 0 );
aB[offset + 5] = (byte)(vI >> 8 );
aB[offset + 6] = (byte)(vI >> 16);
aB[offset + 7] = (byte)(vI >> 24);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_nonalloc_unsafe() {
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vI);
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, vI);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_nonalloc_bale() {
ByteArrayLittleEndian.setInt(aB, offset + 0, vI);
ByteArrayLittleEndian.setInt(aB, offset + 4, vI);
return aB;
}
@Benchmark
public byte[] store_B8_I2_offs_nonalloc_leapi() {
storeIntLE(aB, offset + 0, vI);
storeIntLE(aB, offset + 4, vI);
return aB;
}
@Benchmark
public short[] store_S2_con_offs_allocate_direct() {
short[] aS = new short[RANGE];
aS[offset + 0] = (short)0x0102;
aS[offset + 1] = (short)0x0304;
return aS;
}
@Benchmark
public short[] store_S2_con_offs_nonalloc_direct() {
aS[offset + 0] = (short)0x0102;
aS[offset + 1] = (short)0x0304;
return aS;
}
@Benchmark
public short[] store_S4_con_offs_allocate_direct() {
short[] aS = new short[RANGE];
aS[offset + 0] = (short)0x0102;
aS[offset + 1] = (short)0x0304;
aS[offset + 2] = (short)0x0506;
aS[offset + 3] = (short)0x0708;
return aS;
}
@Benchmark
public short[] store_S4_con_offs_nonalloc_direct() {
aS[offset + 0] = (short)0x0102;
aS[offset + 1] = (short)0x0304;
aS[offset + 2] = (short)0x0506;
aS[offset + 3] = (short)0x0708;
return aS;
}
@Benchmark
public int[] store_I2_con_offs_allocate_direct() {
int[] aI = new int[RANGE];
aI[offset + 0] = 0x01020304;
aI[offset + 1] = 0x05060708;
return aI;
}
@Benchmark
public int[] store_I2_con_offs_nonalloc_direct() {
aI[offset + 0] = 0x01020304;
aI[offset + 1] = 0x05060708;
return aI;
}
@Benchmark
public int[] store_I2_zero_offs_allocate_direct() {
int[] aI = new int[RANGE];
aI[offset + 0] = 0;
aI[offset + 1] = 0;
return aI;
}
@Benchmark
public int[] store_I2_zero_offs_nonalloc_direct() {
aI[offset + 0] = 0;
aI[offset + 1] = 0;
return aI;
}
}