8318446: C2: optimize stores into primitive arrays by combining values into larger store
Reviewed-by: kvn, thartmann
This commit is contained in:
parent
5c3838605d
commit
3ccb64c021
@ -103,6 +103,7 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
|
||||
--add-exports java.base/jdk.internal.event=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.foreign=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.util=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.org.objectweb.asm.tree=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.org.objectweb.asm=ALL-UNNAMED \
|
||||
--add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
|
||||
|
@ -724,9 +724,9 @@ Node* AddPNode::Ideal_base_and_offset(Node* ptr, PhaseValues* phase,
|
||||
//------------------------------unpack_offsets----------------------------------
|
||||
// Collect the AddP offset values into the elements array, giving up
|
||||
// if there are more than length.
|
||||
int AddPNode::unpack_offsets(Node* elements[], int length) {
|
||||
int AddPNode::unpack_offsets(Node* elements[], int length) const {
|
||||
int count = 0;
|
||||
Node* addr = this;
|
||||
Node const* addr = this;
|
||||
Node* base = addr->in(AddPNode::Base);
|
||||
while (addr->is_AddP()) {
|
||||
if (addr->in(AddPNode::Base) != base) {
|
||||
|
@ -181,7 +181,7 @@ public:
|
||||
|
||||
// Collect the AddP offset values into the elements array, giving up
|
||||
// if there are more than length.
|
||||
int unpack_offsets(Node* elements[], int length);
|
||||
int unpack_offsets(Node* elements[], int length) const;
|
||||
|
||||
// Do not match base-ptr edge
|
||||
virtual uint match_edge(uint idx) const;
|
||||
|
@ -353,6 +353,12 @@
|
||||
develop(bool, TraceNewVectors, false, \
|
||||
"Trace creation of Vector nodes") \
|
||||
\
|
||||
product(bool, MergeStores, true, DIAGNOSTIC, \
|
||||
"Optimize stores by combining values into larger store") \
|
||||
\
|
||||
develop(bool, TraceMergeStores, false, \
|
||||
"Trace creation of merged stores") \
|
||||
\
|
||||
product_pd(bool, OptoBundling, \
|
||||
"Generate nops to fill i-cache lines") \
|
||||
\
|
||||
|
@ -931,6 +931,7 @@ Compile::Compile( ciEnv* ci_env,
|
||||
_directive(directive),
|
||||
_log(ci_env->log()),
|
||||
_first_failure_details(nullptr),
|
||||
_for_post_loop_igvn(comp_arena(), 8, 0, nullptr),
|
||||
_congraph(nullptr),
|
||||
NOT_PRODUCT(_igv_printer(nullptr) COMMA)
|
||||
_unique(0),
|
||||
|
@ -2697,6 +2697,683 @@ uint StoreNode::hash() const {
|
||||
return NO_HASH;
|
||||
}
|
||||
|
||||
// Class to parse array pointers, and determine if they are adjacent. We parse the form:
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
//
|
||||
// Note: we accumulate all constant offsets into constant_offset, even the int constant behind
|
||||
// the "LShiftL(ConvI2L(...))" pattern. We convert "ConvI2L(int_offset + int_con)" to
|
||||
// "ConvI2L(int_offset) + int_con", which is only safe if we can assume that either all
|
||||
// compared addresses have an overflow for "int_offset + int_con" or none.
|
||||
// For loads and stores on arrays, we know that if one overflows and the other not, then
|
||||
// the two addresses lay almost max_int indices apart, but the maximal array size is
|
||||
// only about half of that. Therefore, the RangeCheck on at least one of them must have
|
||||
// failed.
|
||||
//
|
||||
// constant_offset += LShiftL( ConvI2L(int_con), int_offset_shift)
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
class ArrayPointer {
|
||||
private:
|
||||
const bool _is_valid; // The parsing succeeded
|
||||
const Node* _pointer; // The final pointer to the position in the array
|
||||
const Node* _base; // Base address of the array
|
||||
const jlong _constant_offset; // Sum of collected constant offsets
|
||||
const Node* _int_offset; // (optional) Offset behind LShiftL and ConvI2L
|
||||
const jint _int_offset_shift; // (optional) Shift value for int_offset
|
||||
const GrowableArray<Node*>* _other_offsets; // List of other AddP offsets
|
||||
|
||||
ArrayPointer(const bool is_valid,
|
||||
const Node* pointer,
|
||||
const Node* base,
|
||||
const jlong constant_offset,
|
||||
const Node* int_offset,
|
||||
const jint int_offset_shift,
|
||||
const GrowableArray<Node*>* other_offsets) :
|
||||
_is_valid(is_valid),
|
||||
_pointer(pointer),
|
||||
_base(base),
|
||||
_constant_offset(constant_offset),
|
||||
_int_offset(int_offset),
|
||||
_int_offset_shift(int_offset_shift),
|
||||
_other_offsets(other_offsets)
|
||||
{
|
||||
assert(_pointer != nullptr, "must always have pointer");
|
||||
assert(is_valid == (_base != nullptr), "have base exactly if valid");
|
||||
assert(is_valid == (_other_offsets != nullptr), "have other_offsets exactly if valid");
|
||||
}
|
||||
|
||||
static ArrayPointer make_invalid(const Node* pointer) {
|
||||
return ArrayPointer(false, pointer, nullptr, 0, nullptr, 0, nullptr);
|
||||
}
|
||||
|
||||
static bool parse_int_offset(Node* offset, Node*& int_offset, jint& int_offset_shift) {
|
||||
// offset = LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
if (offset->Opcode() == Op_LShiftL &&
|
||||
offset->in(1)->Opcode() == Op_ConvI2L &&
|
||||
offset->in(2)->Opcode() == Op_ConI) {
|
||||
int_offset = offset->in(1)->in(1); // LShiftL -> ConvI2L -> int_offset
|
||||
int_offset_shift = offset->in(2)->get_int(); // LShiftL -> int_offset_shift
|
||||
return true;
|
||||
}
|
||||
|
||||
// offset = ConvI2L(int_offset) = LShiftL( ConvI2L(int_offset), 0)
|
||||
if (offset->Opcode() == Op_ConvI2L) {
|
||||
int_offset = offset->in(1);
|
||||
int_offset_shift = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// parse failed
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
// Parse the structure above the pointer
|
||||
static ArrayPointer make(PhaseGVN* phase, const Node* pointer) {
|
||||
assert(phase->type(pointer)->isa_aryptr() != nullptr, "must be array pointer");
|
||||
if (!pointer->is_AddP()) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
const Node* base = pointer->in(AddPNode::Base);
|
||||
if (base == nullptr) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
const int search_depth = 5;
|
||||
Node* offsets[search_depth];
|
||||
int count = pointer->as_AddP()->unpack_offsets(offsets, search_depth);
|
||||
|
||||
// We expect at least a constant each
|
||||
if (count <= 0) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
// We extract the form:
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
jlong constant_offset = 0;
|
||||
Node* int_offset = nullptr;
|
||||
jint int_offset_shift = 0;
|
||||
GrowableArray<Node*>* other_offsets = new GrowableArray<Node*>(count);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
Node* offset = offsets[i];
|
||||
if (offset->Opcode() == Op_ConI) {
|
||||
// Constant int offset
|
||||
constant_offset += offset->get_int();
|
||||
} else if (offset->Opcode() == Op_ConL) {
|
||||
// Constant long offset
|
||||
constant_offset += offset->get_long();
|
||||
} else if(int_offset == nullptr && parse_int_offset(offset, int_offset, int_offset_shift)) {
|
||||
// LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
int_offset = int_offset->uncast();
|
||||
if (int_offset->Opcode() == Op_AddI && int_offset->in(2)->Opcode() == Op_ConI) {
|
||||
// LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
constant_offset += ((jlong)int_offset->in(2)->get_int()) << int_offset_shift;
|
||||
int_offset = int_offset->in(1);
|
||||
}
|
||||
} else {
|
||||
// All others
|
||||
other_offsets->append(offset);
|
||||
}
|
||||
}
|
||||
|
||||
return ArrayPointer(true, pointer, base, constant_offset, int_offset, int_offset_shift, other_offsets);
|
||||
}
|
||||
|
||||
bool is_adjacent_to_and_before(const ArrayPointer& other, const jlong data_size) const {
|
||||
if (!_is_valid || !other._is_valid) { return false; }
|
||||
|
||||
// Offset adjacent?
|
||||
if (this->_constant_offset + data_size != other._constant_offset) { return false; }
|
||||
|
||||
// All other components identical?
|
||||
if (this->_base != other._base ||
|
||||
this->_int_offset != other._int_offset ||
|
||||
this->_int_offset_shift != other._int_offset_shift ||
|
||||
this->_other_offsets->length() != other._other_offsets->length()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < this->_other_offsets->length(); i++) {
|
||||
Node* o1 = this->_other_offsets->at(i);
|
||||
Node* o2 = other._other_offsets->at(i);
|
||||
if (o1 != o2) { return false; }
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void dump() {
|
||||
if (!_is_valid) {
|
||||
tty->print("ArrayPointer[%d %s, invalid]", _pointer->_idx, _pointer->Name());
|
||||
return;
|
||||
}
|
||||
tty->print("ArrayPointer[%d %s, base[%d %s] + %lld",
|
||||
_pointer->_idx, _pointer->Name(),
|
||||
_base->_idx, _base->Name(),
|
||||
(long long)_constant_offset);
|
||||
if (_int_offset != 0) {
|
||||
tty->print(" + I2L[%d %s] << %d",
|
||||
_int_offset->_idx, _int_offset->Name(), _int_offset_shift);
|
||||
}
|
||||
for (int i = 0; i < _other_offsets->length(); i++) {
|
||||
Node* n = _other_offsets->at(i);
|
||||
tty->print(" + [%d %s]", n->_idx, n->Name());
|
||||
}
|
||||
tty->print_cr("]");
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Link together multiple stores (B/S/C/I) into a longer one.
|
||||
//
|
||||
// Example: _store = StoreB[i+3]
|
||||
//
|
||||
// RangeCheck[i+0] RangeCheck[i+0]
|
||||
// StoreB[i+0]
|
||||
// RangeCheck[i+1] RangeCheck[i+1]
|
||||
// StoreB[i+1] --> pass: fail:
|
||||
// StoreB[i+2] StoreI[i+0] StoreB[i+0]
|
||||
// StoreB[i+3]
|
||||
//
|
||||
// The 4 StoreB are merged into a single StoreI node. We have to be careful with RangeCheck[i+1]: before
|
||||
// the optimization, if this RangeCheck[i+1] fails, then we execute only StoreB[i+0], and then trap. After
|
||||
// the optimization, the new StoreI[i+0] is on the passing path of RangeCheck[i+1], and StoreB[i+0] on the
|
||||
// failing path.
|
||||
//
|
||||
// Note: For normal array stores, every store at first has a RangeCheck. But they can be removed with:
|
||||
// - RCE (RangeCheck Elimination): the RangeChecks in the loop are hoisted out and before the loop,
|
||||
// and possibly no RangeChecks remain between the stores.
|
||||
// - RangeCheck smearing: the earlier RangeChecks are adjusted such that they cover later RangeChecks,
|
||||
// and those later RangeChecks can be removed. Example:
|
||||
//
|
||||
// RangeCheck[i+0] RangeCheck[i+0] <- before first store
|
||||
// StoreB[i+0] StoreB[i+0] <- first store
|
||||
// RangeCheck[i+1] --> smeared --> RangeCheck[i+3] <- only RC between first and last store
|
||||
// StoreB[i+0] StoreB[i+1] <- second store
|
||||
// RangeCheck[i+2] --> removed
|
||||
// StoreB[i+0] StoreB[i+2]
|
||||
// RangeCheck[i+3] --> removed
|
||||
// StoreB[i+0] StoreB[i+3] <- last store
|
||||
//
|
||||
// Thus, it is a common pattern that between the first and last store in a chain
|
||||
// of adjacent stores there remains exactly one RangeCheck, located between the
|
||||
// first and the second store (e.g. RangeCheck[i+3]).
|
||||
//
|
||||
class MergePrimitiveArrayStores : public StackObj {
|
||||
private:
|
||||
PhaseGVN* _phase;
|
||||
StoreNode* _store;
|
||||
|
||||
public:
|
||||
MergePrimitiveArrayStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {}
|
||||
|
||||
StoreNode* run();
|
||||
|
||||
private:
|
||||
bool is_compatible_store(const StoreNode* other_store) const;
|
||||
bool is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const;
|
||||
bool is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const;
|
||||
static bool is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out);
|
||||
enum CFGStatus { SuccessNoRangeCheck, SuccessWithRangeCheck, Failure };
|
||||
static CFGStatus cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store);
|
||||
|
||||
class Status {
|
||||
private:
|
||||
StoreNode* _found_store;
|
||||
bool _found_range_check;
|
||||
|
||||
Status(StoreNode* found_store, bool found_range_check)
|
||||
: _found_store(found_store), _found_range_check(found_range_check) {}
|
||||
|
||||
public:
|
||||
StoreNode* found_store() const { return _found_store; }
|
||||
bool found_range_check() const { return _found_range_check; }
|
||||
static Status make_failure() { return Status(nullptr, false); }
|
||||
|
||||
static Status make(StoreNode* found_store, const CFGStatus cfg_status) {
|
||||
if (cfg_status == CFGStatus::Failure) {
|
||||
return Status::make_failure();
|
||||
}
|
||||
return Status(found_store, cfg_status == CFGStatus::SuccessWithRangeCheck);
|
||||
}
|
||||
};
|
||||
|
||||
Status find_adjacent_use_store(const StoreNode* def_store) const;
|
||||
Status find_adjacent_def_store(const StoreNode* use_store) const;
|
||||
Status find_use_store(const StoreNode* def_store) const;
|
||||
Status find_def_store(const StoreNode* use_store) const;
|
||||
Status find_use_store_unidirectional(const StoreNode* def_store) const;
|
||||
Status find_def_store_unidirectional(const StoreNode* use_store) const;
|
||||
|
||||
void collect_merge_list(Node_List& merge_list) const;
|
||||
Node* make_merged_input_value(const Node_List& merge_list);
|
||||
StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value);
|
||||
|
||||
DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; )
|
||||
};
|
||||
|
||||
StoreNode* MergePrimitiveArrayStores::run() {
|
||||
// Check for B/S/C/I
|
||||
int opc = _store->Opcode();
|
||||
if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Only merge stores on arrays, and the stores must have the same size as the elements.
|
||||
const TypeAryPtr* aryptr_t = _store->adr_type()->isa_aryptr();
|
||||
if (aryptr_t == nullptr ||
|
||||
type2aelembytes(aryptr_t->elem()->array_element_basic_type()) != _store->memory_size()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// The _store must be the "last" store in a chain. If we find a use we could merge with
|
||||
// then that use or a store further down is the "last" store.
|
||||
Status status_use = find_adjacent_use_store(_store);
|
||||
if (status_use.found_store() != nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Check if we can merge with at least one def, so that we have at least 2 stores to merge.
|
||||
Status status_def = find_adjacent_def_store(_store);
|
||||
if (status_def.found_store() == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ResourceMark rm;
|
||||
Node_List merge_list;
|
||||
collect_merge_list(merge_list);
|
||||
|
||||
Node* merged_input_value = make_merged_input_value(merge_list);
|
||||
if (merged_input_value == nullptr) { return nullptr; }
|
||||
|
||||
StoreNode* merged_store = make_merged_store(merge_list, merged_input_value);
|
||||
|
||||
DEBUG_ONLY( if(TraceMergeStores) { trace(merge_list, merged_input_value, merged_store); } )
|
||||
|
||||
return merged_store;
|
||||
}
|
||||
|
||||
// Check compatibility between _store and other_store.
|
||||
bool MergePrimitiveArrayStores::is_compatible_store(const StoreNode* other_store) const {
|
||||
int opc = _store->Opcode();
|
||||
assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition");
|
||||
assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store");
|
||||
|
||||
if (other_store == nullptr ||
|
||||
_store->Opcode() != other_store->Opcode() ||
|
||||
other_store->adr_type()->isa_aryptr() == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that the size of the stores, and the array elements are all the same.
|
||||
const TypeAryPtr* aryptr_t1 = _store->adr_type()->is_aryptr();
|
||||
const TypeAryPtr* aryptr_t2 = other_store->adr_type()->is_aryptr();
|
||||
int size1 = type2aelembytes(aryptr_t1->elem()->array_element_basic_type());
|
||||
int size2 = type2aelembytes(aryptr_t2->elem()->array_element_basic_type());
|
||||
if (size1 != size2 ||
|
||||
size1 != _store->memory_size() ||
|
||||
_store->memory_size() != other_store->memory_size()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const {
|
||||
if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn),
|
||||
use_store->in(MemNode::ValueIn),
|
||||
def_store->memory_size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ResourceMark rm;
|
||||
ArrayPointer array_pointer_use = ArrayPointer::make(_phase, use_store->in(MemNode::Address));
|
||||
ArrayPointer array_pointer_def = ArrayPointer::make(_phase, def_store->in(MemNode::Address));
|
||||
if (!array_pointer_def.is_adjacent_to_and_before(array_pointer_use, use_store->memory_size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const {
|
||||
// Pattern: [n1 = ConI, n2 = ConI]
|
||||
if (n1->Opcode() == Op_ConI) {
|
||||
return n2->Opcode() == Op_ConI;
|
||||
}
|
||||
|
||||
// Pattern: [n1 = base >> shift, n2 = base >> (shift + memory_size)]
|
||||
Node const* base_n2;
|
||||
jint shift_n2;
|
||||
if (!is_con_RShift(n2, base_n2, shift_n2)) {
|
||||
return false;
|
||||
}
|
||||
if (n1->Opcode() == Op_ConvL2I) {
|
||||
// look through
|
||||
n1 = n1->in(1);
|
||||
}
|
||||
Node const* base_n1;
|
||||
jint shift_n1;
|
||||
if (n1 == base_n2) {
|
||||
// n1 = base = base >> 0
|
||||
base_n1 = n1;
|
||||
shift_n1 = 0;
|
||||
} else if (!is_con_RShift(n1, base_n1, shift_n1)) {
|
||||
return false;
|
||||
}
|
||||
int bits_per_store = memory_size * 8;
|
||||
if (base_n1 != base_n2 ||
|
||||
shift_n1 + bits_per_store != shift_n2 ||
|
||||
shift_n1 % bits_per_store != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// both load from same value with correct shift
|
||||
return true;
|
||||
}
|
||||
|
||||
// Detect pattern: n = base_out >> shift_out
|
||||
bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) {
|
||||
assert(n != nullptr, "precondition");
|
||||
|
||||
int opc = n->Opcode();
|
||||
if (opc == Op_ConvL2I) {
|
||||
n = n->in(1);
|
||||
opc = n->Opcode();
|
||||
}
|
||||
|
||||
if ((opc == Op_RShiftI ||
|
||||
opc == Op_RShiftL ||
|
||||
opc == Op_URShiftI ||
|
||||
opc == Op_URShiftL) &&
|
||||
n->in(2)->is_ConI()) {
|
||||
base_out = n->in(1);
|
||||
shift_out = n->in(2)->get_int();
|
||||
assert(shift_out >= 0, "must be positive");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if there is nothing between the two stores, except optionally a RangeCheck leading to an uncommon trap.
|
||||
MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) {
|
||||
assert(use_store->in(MemNode::Memory) == def_store, "use-def relationship");
|
||||
|
||||
Node* ctrl_use = use_store->in(MemNode::Control);
|
||||
Node* ctrl_def = def_store->in(MemNode::Control);
|
||||
if (ctrl_use == nullptr || ctrl_def == nullptr) {
|
||||
return CFGStatus::Failure;
|
||||
}
|
||||
|
||||
if (ctrl_use == ctrl_def) {
|
||||
// Same ctrl -> no RangeCheck in between.
|
||||
// Check: use_store must be the only use of def_store.
|
||||
if (def_store->outcnt() > 1) {
|
||||
return CFGStatus::Failure;
|
||||
}
|
||||
return CFGStatus::SuccessNoRangeCheck;
|
||||
}
|
||||
|
||||
// Different ctrl -> could have RangeCheck in between.
|
||||
// Check: 1. def_store only has these uses: use_store and MergeMem for uncommon trap, and
|
||||
// 2. ctrl separated by RangeCheck.
|
||||
if (def_store->outcnt() != 2) {
|
||||
return CFGStatus::Failure; // Cannot have exactly these uses: use_store and MergeMem for uncommon trap.
|
||||
}
|
||||
int use_store_out_idx = def_store->raw_out(0) == use_store ? 0 : 1;
|
||||
Node* merge_mem = def_store->raw_out(1 - use_store_out_idx)->isa_MergeMem();
|
||||
if (merge_mem == nullptr ||
|
||||
merge_mem->outcnt() != 1) {
|
||||
return CFGStatus::Failure; // Does not have MergeMem for uncommon trap.
|
||||
}
|
||||
if (!ctrl_use->is_IfProj() ||
|
||||
!ctrl_use->in(0)->is_RangeCheck() ||
|
||||
ctrl_use->in(0)->outcnt() != 2) {
|
||||
return CFGStatus::Failure; // Not RangeCheck.
|
||||
}
|
||||
ProjNode* other_proj = ctrl_use->as_IfProj()->other_if_proj();
|
||||
Node* trap = other_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check);
|
||||
if (trap != merge_mem->unique_out() ||
|
||||
ctrl_use->in(0)->in(0) != ctrl_def) {
|
||||
return CFGStatus::Failure; // Not RangeCheck with merge_mem leading to uncommon trap.
|
||||
}
|
||||
|
||||
return CFGStatus::SuccessWithRangeCheck;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_store(const StoreNode* def_store) const {
|
||||
Status status_use = find_use_store(def_store);
|
||||
StoreNode* use_store = status_use.found_store();
|
||||
if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
|
||||
return Status::make_failure();
|
||||
}
|
||||
return status_use;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_store(const StoreNode* use_store) const {
|
||||
Status status_def = find_def_store(use_store);
|
||||
StoreNode* def_store = status_def.found_store();
|
||||
if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
|
||||
return Status::make_failure();
|
||||
}
|
||||
return status_def;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(const StoreNode* def_store) const {
|
||||
Status status_use = find_use_store_unidirectional(def_store);
|
||||
|
||||
#ifdef ASSERT
|
||||
StoreNode* use_store = status_use.found_store();
|
||||
if (use_store != nullptr) {
|
||||
Status status_def = find_def_store_unidirectional(use_store);
|
||||
assert(status_def.found_store() == def_store &&
|
||||
status_def.found_range_check() == status_use.found_range_check(),
|
||||
"find_use_store and find_def_store must be symmetric");
|
||||
}
|
||||
#endif
|
||||
|
||||
return status_use;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(const StoreNode* use_store) const {
|
||||
Status status_def = find_def_store_unidirectional(use_store);
|
||||
|
||||
#ifdef ASSERT
|
||||
StoreNode* def_store = status_def.found_store();
|
||||
if (def_store != nullptr) {
|
||||
Status status_use = find_use_store_unidirectional(def_store);
|
||||
assert(status_use.found_store() == use_store &&
|
||||
status_use.found_range_check() == status_def.found_range_check(),
|
||||
"find_use_store and find_def_store must be symmetric");
|
||||
}
|
||||
#endif
|
||||
|
||||
return status_def;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unidirectional(const StoreNode* def_store) const {
|
||||
assert(is_compatible_store(def_store), "precondition: must be compatible with _store");
|
||||
|
||||
for (DUIterator_Fast imax, i = def_store->fast_outs(imax); i < imax; i++) {
|
||||
StoreNode* use_store = def_store->fast_out(i)->isa_Store();
|
||||
if (is_compatible_store(use_store)) {
|
||||
return Status::make(use_store, cfg_status_for_pair(use_store, def_store));
|
||||
}
|
||||
}
|
||||
|
||||
return Status::make_failure();
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unidirectional(const StoreNode* use_store) const {
|
||||
assert(is_compatible_store(use_store), "precondition: must be compatible with _store");
|
||||
|
||||
StoreNode* def_store = use_store->in(MemNode::Memory)->isa_Store();
|
||||
if (!is_compatible_store(def_store)) {
|
||||
return Status::make_failure();
|
||||
}
|
||||
|
||||
return Status::make(def_store, cfg_status_for_pair(use_store, def_store));
|
||||
}
|
||||
|
||||
void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const {
|
||||
// The merged store can be at most 8 bytes.
|
||||
const uint merge_list_max_size = 8 / _store->memory_size();
|
||||
assert(merge_list_max_size >= 2 &&
|
||||
merge_list_max_size <= 8 &&
|
||||
is_power_of_2(merge_list_max_size),
|
||||
"must be 2, 4 or 8");
|
||||
|
||||
// Traverse up the chain of adjacent def stores.
|
||||
StoreNode* current = _store;
|
||||
merge_list.push(current);
|
||||
while (current != nullptr && merge_list.size() < merge_list_max_size) {
|
||||
Status status = find_adjacent_def_store(current);
|
||||
current = status.found_store();
|
||||
if (current != nullptr) {
|
||||
merge_list.push(current);
|
||||
|
||||
// We can have at most one RangeCheck.
|
||||
if (status.found_range_check()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate the merge_list to a power of 2.
|
||||
const uint pow2size = round_down_power_of_2(merge_list.size());
|
||||
assert(pow2size >= 2, "must be merging at least 2 stores");
|
||||
while (merge_list.size() > pow2size) { merge_list.pop(); }
|
||||
}
|
||||
|
||||
// Merge the input values of the smaller stores to a single larger input value.
|
||||
Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_list) {
|
||||
int new_memory_size = _store->memory_size() * merge_list.size();
|
||||
Node* first = merge_list.at(merge_list.size()-1);
|
||||
Node* merged_input_value = nullptr;
|
||||
if (_store->in(MemNode::ValueIn)->Opcode() == Op_ConI) {
|
||||
// Pattern: [ConI, ConI, ...] -> new constant
|
||||
jlong con = 0;
|
||||
jlong bits_per_store = _store->memory_size() * 8;
|
||||
jlong mask = (((jlong)1) << bits_per_store) - 1;
|
||||
for (uint i = 0; i < merge_list.size(); i++) {
|
||||
jlong con_i = merge_list.at(i)->in(MemNode::ValueIn)->get_int();
|
||||
con = con << bits_per_store;
|
||||
con = con | (mask & con_i);
|
||||
}
|
||||
merged_input_value = _phase->longcon(con);
|
||||
} else {
|
||||
// Pattern: [base >> 24, base >> 16, base >> 8, base] -> base
|
||||
// | |
|
||||
// _store first
|
||||
//
|
||||
merged_input_value = first->in(MemNode::ValueIn);
|
||||
Node const* base_last;
|
||||
jint shift_last;
|
||||
bool is_true = is_con_RShift(_store->in(MemNode::ValueIn), base_last, shift_last);
|
||||
assert(is_true, "must detect con RShift");
|
||||
if (merged_input_value != base_last && merged_input_value->Opcode() == Op_ConvL2I) {
|
||||
// look through
|
||||
merged_input_value = merged_input_value->in(1);
|
||||
}
|
||||
if (merged_input_value != base_last) {
|
||||
// merged_input_value is not the base
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (_phase->type(merged_input_value)->isa_long() != nullptr && new_memory_size <= 4) {
|
||||
// Example:
|
||||
//
|
||||
// long base = ...;
|
||||
// a[0] = (byte)(base >> 0);
|
||||
// a[1] = (byte)(base >> 8);
|
||||
//
|
||||
merged_input_value = _phase->transform(new ConvL2INode(merged_input_value));
|
||||
}
|
||||
|
||||
assert((_phase->type(merged_input_value)->isa_int() != nullptr && new_memory_size <= 4) ||
|
||||
(_phase->type(merged_input_value)->isa_long() != nullptr && new_memory_size == 8),
|
||||
"merged_input_value is either int or long, and new_memory_size is small enough");
|
||||
|
||||
return merged_input_value;
|
||||
}
|
||||
|
||||
// //
|
||||
// first_ctrl first_mem first_adr first_ctrl first_mem first_adr //
|
||||
// | | | | | | //
|
||||
// | | | | +---------------+ | //
|
||||
// | | | | | | | //
|
||||
// | | +---------+ | | +---------------+ //
|
||||
// | | | | | | | | //
|
||||
// +--------------+ | | v1 +------------------------------+ | | v1 //
|
||||
// | | | | | | | | | | | | //
|
||||
// RangeCheck first_store RangeCheck | | first_store //
|
||||
// | | | | | | | //
|
||||
// last_ctrl | +----> unc_trap last_ctrl | | +----> unc_trap //
|
||||
// | | ===> | | | //
|
||||
// +--------------+ | a2 v2 | | | //
|
||||
// | | | | | | | | //
|
||||
// | second_store | | | //
|
||||
// | | | | | [v1 v2 ... vn] //
|
||||
// ... ... | | | | //
|
||||
// | | | | | v //
|
||||
// +--------------+ | an vn +--------------+ | | merged_input_value //
|
||||
// | | | | | | | | //
|
||||
// last_store (= _store) merged_store //
|
||||
// //
|
||||
StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) {
|
||||
Node* first_store = merge_list.at(merge_list.size()-1);
|
||||
Node* last_ctrl = _store->in(MemNode::Control); // after (optional) RangeCheck
|
||||
Node* first_mem = first_store->in(MemNode::Memory);
|
||||
Node* first_adr = first_store->in(MemNode::Address);
|
||||
|
||||
const TypePtr* new_adr_type = _store->adr_type();
|
||||
|
||||
int new_memory_size = _store->memory_size() * merge_list.size();
|
||||
BasicType bt = T_ILLEGAL;
|
||||
switch (new_memory_size) {
|
||||
case 2: bt = T_SHORT; break;
|
||||
case 4: bt = T_INT; break;
|
||||
case 8: bt = T_LONG; break;
|
||||
}
|
||||
|
||||
StoreNode* merged_store = StoreNode::make(*_phase, last_ctrl, first_mem, first_adr,
|
||||
new_adr_type, merged_input_value, bt, MemNode::unordered);
|
||||
|
||||
// Marking the store mismatched is sufficient to prevent reordering, since array stores
|
||||
// are all on the same slice. Hence, we need no barriers.
|
||||
merged_store->set_mismatched_access();
|
||||
|
||||
// Constants above may now also be be packed -> put candidate on worklist
|
||||
_phase->is_IterGVN()->_worklist.push(first_mem);
|
||||
|
||||
return merged_store;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void MergePrimitiveArrayStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const {
|
||||
stringStream ss;
|
||||
ss.print_cr("[TraceMergeStores]: Replace");
|
||||
for (int i = (int)merge_list.size() - 1; i >= 0; i--) {
|
||||
merge_list.at(i)->dump("\n", false, &ss);
|
||||
}
|
||||
ss.print_cr("[TraceMergeStores]: with");
|
||||
merged_input_value->dump("\n", false, &ss);
|
||||
merged_store->dump("\n", false, &ss);
|
||||
tty->print("%s", ss.as_string());
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------Ideal------------------------------------------
|
||||
// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
|
||||
// When a store immediately follows a relevant allocation/initialization,
|
||||
@ -2782,6 +3459,18 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
if (MergeStores && UseUnalignedAccesses) {
|
||||
if (phase->C->post_loop_opts_phase()) {
|
||||
MergePrimitiveArrayStores merge(phase, this);
|
||||
Node* progress = merge.run();
|
||||
if (progress != nullptr) { return progress; }
|
||||
} else {
|
||||
phase->C->record_for_post_loop_opts_igvn(this);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr; // No further progress
|
||||
}
|
||||
|
||||
|
@ -2273,7 +2273,15 @@ void PhasePeephole::print_statistics() {
|
||||
//------------------------------set_req_X--------------------------------------
|
||||
void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
|
||||
assert( is_not_dead(n), "can not use dead node");
|
||||
assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" );
|
||||
#ifdef ASSERT
|
||||
if (igvn->hash_find(this) == this) {
|
||||
tty->print_cr("Need to remove from hash before changing edges");
|
||||
this->dump(1);
|
||||
tty->print_cr("Set at i = %d", i);
|
||||
n->dump();
|
||||
assert(false, "Need to remove from hash before changing edges");
|
||||
}
|
||||
#endif
|
||||
Node *old = in(i);
|
||||
set_req(i, n);
|
||||
|
||||
|
1247
test/hotspot/jtreg/compiler/c2/TestMergeStores.java
Normal file
1247
test/hotspot/jtreg/compiler/c2/TestMergeStores.java
Normal file
File diff suppressed because it is too large
Load Diff
696
test/micro/org/openjdk/bench/vm/compiler/MergeStores.java
Normal file
696
test/micro/org/openjdk/bench/vm/compiler/MergeStores.java
Normal file
@ -0,0 +1,696 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package org.openjdk.bench.vm.compiler;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
|
||||
import jdk.internal.misc.Unsafe;
|
||||
import jdk.internal.util.ByteArrayLittleEndian;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 3, time = 3)
|
||||
@Measurement(iterations = 3, time = 3)
|
||||
@Fork(value = 3, jvmArgsAppend = {
|
||||
"--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
|
||||
"--add-exports", "java.base/jdk.internal.util=ALL-UNNAMED"})
|
||||
@State(Scope.Benchmark)
|
||||
public class MergeStores {
|
||||
|
||||
public static final int RANGE = 100;
|
||||
|
||||
static Unsafe UNSAFE = Unsafe.getUnsafe();
|
||||
|
||||
@Param("1")
|
||||
public static short vS;
|
||||
|
||||
@Param("1")
|
||||
public static int vI;
|
||||
|
||||
@Param("1")
|
||||
public static long vL;
|
||||
|
||||
public static int offset = 5;
|
||||
public static byte[] aB = new byte[RANGE];
|
||||
public static short[] aS = new short[RANGE];
|
||||
public static int[] aI = new int[RANGE];
|
||||
|
||||
// -------------------------------------------
|
||||
// ------- Little-Endian API ----------
|
||||
// -------------------------------------------
|
||||
|
||||
// Store a short LE into an array using store bytes in an array
|
||||
static void storeShortLE(byte[] bytes, int offset, short value) {
|
||||
storeBytes(bytes, offset, (byte)(value >> 0),
|
||||
(byte)(value >> 8));
|
||||
}
|
||||
|
||||
// Store an int LE into an array using store bytes in an array
|
||||
static void storeIntLE(byte[] bytes, int offset, int value) {
|
||||
storeBytes(bytes, offset, (byte)(value >> 0 ),
|
||||
(byte)(value >> 8 ),
|
||||
(byte)(value >> 16),
|
||||
(byte)(value >> 24));
|
||||
}
|
||||
|
||||
// Store an int LE into an array using store bytes in an array
|
||||
static void storeLongLE(byte[] bytes, int offset, long value) {
|
||||
storeBytes(bytes, offset, (byte)(value >> 0 ),
|
||||
(byte)(value >> 8 ),
|
||||
(byte)(value >> 16),
|
||||
(byte)(value >> 24),
|
||||
(byte)(value >> 32),
|
||||
(byte)(value >> 40),
|
||||
(byte)(value >> 48),
|
||||
(byte)(value >> 56));
|
||||
}
|
||||
|
||||
// Store 2 bytes into an array
|
||||
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1) {
|
||||
bytes[offset + 0] = b0;
|
||||
bytes[offset + 1] = b1;
|
||||
}
|
||||
|
||||
// Store 4 bytes into an array
|
||||
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1, byte b2, byte b3) {
|
||||
bytes[offset + 0] = b0;
|
||||
bytes[offset + 1] = b1;
|
||||
bytes[offset + 2] = b2;
|
||||
bytes[offset + 3] = b3;
|
||||
}
|
||||
|
||||
// Store 8 bytes into an array
|
||||
static void storeBytes(byte[] bytes, int offset, byte b0, byte b1, byte b2, byte b3,
|
||||
byte b4, byte b5, byte b6, byte b7) {
|
||||
bytes[offset + 0] = b0;
|
||||
bytes[offset + 1] = b1;
|
||||
bytes[offset + 2] = b2;
|
||||
bytes[offset + 3] = b3;
|
||||
bytes[offset + 4] = b4;
|
||||
bytes[offset + 5] = b5;
|
||||
bytes[offset + 6] = b6;
|
||||
bytes[offset + 7] = b7;
|
||||
}
|
||||
|
||||
// -------------------------------- BENCHMARKS --------------------------------
|
||||
|
||||
@Benchmark
|
||||
public void baseline() {
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] baseline_allocate() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_adr0_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[0] = (byte)0x01;
|
||||
aB[1] = (byte)0x02;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_adr1_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[1] = (byte)0x01;
|
||||
aB[2] = (byte)0x02;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setShort(aB, offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeShortLE(aB, offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setShort(aB, offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_con_offs_nonalloc_leapi() {
|
||||
storeShortLE(aB, offset, (short)0x0201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)(vS >> 0 );
|
||||
aB[offset + 1] = (byte)(vS >> 8 );
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setShort(aB, offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeShortLE(aB, offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)(vS >> 0 );
|
||||
aB[offset + 1] = (byte)(vS >> 8 );
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putShortUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setShort(aB, offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B2_S_offs_nonalloc_leapi() {
|
||||
storeShortLE(aB, offset, vS);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_adr0_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[0] = (byte)0x01;
|
||||
aB[1] = (byte)0x02;
|
||||
aB[2] = (byte)0x03;
|
||||
aB[3] = (byte)0x04;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_adr1_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[1] = (byte)0x01;
|
||||
aB[2] = (byte)0x02;
|
||||
aB[3] = (byte)0x03;
|
||||
aB[4] = (byte)0x04;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
aB[offset + 2] = (byte)0x03;
|
||||
aB[offset + 3] = (byte)0x04;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setInt(aB, offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeIntLE(aB, offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
aB[offset + 2] = (byte)0x03;
|
||||
aB[offset + 3] = (byte)0x04;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setInt(aB, offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_con_offs_nonalloc_leapi() {
|
||||
storeIntLE(aB, offset, 0x04030201);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)(vI >> 0 );
|
||||
aB[offset + 1] = (byte)(vI >> 8 );
|
||||
aB[offset + 2] = (byte)(vI >> 16);
|
||||
aB[offset + 3] = (byte)(vI >> 24);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setInt(aB, offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeIntLE(aB, offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)(vI >> 0 );
|
||||
aB[offset + 1] = (byte)(vI >> 8 );
|
||||
aB[offset + 2] = (byte)(vI >> 16);
|
||||
aB[offset + 3] = (byte)(vI >> 24);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putIntUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setInt(aB, offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B4_I_offs_nonalloc_leapi() {
|
||||
storeIntLE(aB, offset, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_adr0_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[0] = (byte)0x01;
|
||||
aB[1] = (byte)0x02;
|
||||
aB[2] = (byte)0x03;
|
||||
aB[3] = (byte)0x04;
|
||||
aB[4] = (byte)0x05;
|
||||
aB[5] = (byte)0x06;
|
||||
aB[6] = (byte)0x07;
|
||||
aB[7] = (byte)0x08;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_adr1_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[1] = (byte)0x01;
|
||||
aB[2] = (byte)0x02;
|
||||
aB[3] = (byte)0x03;
|
||||
aB[4] = (byte)0x04;
|
||||
aB[5] = (byte)0x05;
|
||||
aB[6] = (byte)0x06;
|
||||
aB[7] = (byte)0x07;
|
||||
aB[8] = (byte)0x08;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
aB[offset + 2] = (byte)0x03;
|
||||
aB[offset + 3] = (byte)0x04;
|
||||
aB[offset + 4] = (byte)0x05;
|
||||
aB[offset + 5] = (byte)0x06;
|
||||
aB[offset + 6] = (byte)0x07;
|
||||
aB[offset + 7] = (byte)0x08;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setLong(aB, offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeLongLE(aB, offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)0x01;
|
||||
aB[offset + 1] = (byte)0x02;
|
||||
aB[offset + 2] = (byte)0x03;
|
||||
aB[offset + 3] = (byte)0x04;
|
||||
aB[offset + 4] = (byte)0x05;
|
||||
aB[offset + 5] = (byte)0x06;
|
||||
aB[offset + 6] = (byte)0x07;
|
||||
aB[offset + 7] = (byte)0x08;
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setLong(aB, offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_con_offs_nonalloc_leapi() {
|
||||
storeLongLE(aB, offset, 0x0807060504030201L);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)(vL >> 0 );
|
||||
aB[offset + 1] = (byte)(vL >> 8 );
|
||||
aB[offset + 2] = (byte)(vL >> 16);
|
||||
aB[offset + 3] = (byte)(vL >> 24);
|
||||
aB[offset + 4] = (byte)(vL >> 32);
|
||||
aB[offset + 5] = (byte)(vL >> 40);
|
||||
aB[offset + 6] = (byte)(vL >> 48);
|
||||
aB[offset + 7] = (byte)(vL >> 56);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setLong(aB, offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeLongLE(aB, offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)(vL >> 0 );
|
||||
aB[offset + 1] = (byte)(vL >> 8 );
|
||||
aB[offset + 2] = (byte)(vL >> 16);
|
||||
aB[offset + 3] = (byte)(vL >> 24);
|
||||
aB[offset + 4] = (byte)(vL >> 32);
|
||||
aB[offset + 5] = (byte)(vL >> 40);
|
||||
aB[offset + 6] = (byte)(vL >> 48);
|
||||
aB[offset + 7] = (byte)(vL >> 56);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setLong(aB, offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_L_offs_nonalloc_leapi() {
|
||||
storeLongLE(aB, offset, vL);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_allocate_direct() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
aB[offset + 0] = (byte)(vI >> 0 );
|
||||
aB[offset + 1] = (byte)(vI >> 8 );
|
||||
aB[offset + 2] = (byte)(vI >> 16);
|
||||
aB[offset + 3] = (byte)(vI >> 24);
|
||||
aB[offset + 4] = (byte)(vI >> 0 );
|
||||
aB[offset + 5] = (byte)(vI >> 8 );
|
||||
aB[offset + 6] = (byte)(vI >> 16);
|
||||
aB[offset + 7] = (byte)(vI >> 24);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_allocate_unsafe() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vI);
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_allocate_bale() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
ByteArrayLittleEndian.setInt(aB, offset + 0, vI);
|
||||
ByteArrayLittleEndian.setInt(aB, offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_allocate_leapi() {
|
||||
byte[] aB = new byte[RANGE];
|
||||
storeIntLE(aB, offset + 0, vI);
|
||||
storeIntLE(aB, offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_nonalloc_direct() {
|
||||
aB[offset + 0] = (byte)(vI >> 0 );
|
||||
aB[offset + 1] = (byte)(vI >> 8 );
|
||||
aB[offset + 2] = (byte)(vI >> 16);
|
||||
aB[offset + 3] = (byte)(vI >> 24);
|
||||
aB[offset + 4] = (byte)(vI >> 0 );
|
||||
aB[offset + 5] = (byte)(vI >> 8 );
|
||||
aB[offset + 6] = (byte)(vI >> 16);
|
||||
aB[offset + 7] = (byte)(vI >> 24);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_nonalloc_unsafe() {
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vI);
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_nonalloc_bale() {
|
||||
ByteArrayLittleEndian.setInt(aB, offset + 0, vI);
|
||||
ByteArrayLittleEndian.setInt(aB, offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] store_B8_I2_offs_nonalloc_leapi() {
|
||||
storeIntLE(aB, offset + 0, vI);
|
||||
storeIntLE(aB, offset + 4, vI);
|
||||
return aB;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short[] store_S2_con_offs_allocate_direct() {
|
||||
short[] aS = new short[RANGE];
|
||||
aS[offset + 0] = (short)0x0102;
|
||||
aS[offset + 1] = (short)0x0304;
|
||||
return aS;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short[] store_S2_con_offs_nonalloc_direct() {
|
||||
aS[offset + 0] = (short)0x0102;
|
||||
aS[offset + 1] = (short)0x0304;
|
||||
return aS;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short[] store_S4_con_offs_allocate_direct() {
|
||||
short[] aS = new short[RANGE];
|
||||
aS[offset + 0] = (short)0x0102;
|
||||
aS[offset + 1] = (short)0x0304;
|
||||
aS[offset + 2] = (short)0x0506;
|
||||
aS[offset + 3] = (short)0x0708;
|
||||
return aS;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public short[] store_S4_con_offs_nonalloc_direct() {
|
||||
aS[offset + 0] = (short)0x0102;
|
||||
aS[offset + 1] = (short)0x0304;
|
||||
aS[offset + 2] = (short)0x0506;
|
||||
aS[offset + 3] = (short)0x0708;
|
||||
return aS;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] store_I2_con_offs_allocate_direct() {
|
||||
int[] aI = new int[RANGE];
|
||||
aI[offset + 0] = 0x01020304;
|
||||
aI[offset + 1] = 0x05060708;
|
||||
return aI;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] store_I2_con_offs_nonalloc_direct() {
|
||||
aI[offset + 0] = 0x01020304;
|
||||
aI[offset + 1] = 0x05060708;
|
||||
return aI;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] store_I2_zero_offs_allocate_direct() {
|
||||
int[] aI = new int[RANGE];
|
||||
aI[offset + 0] = 0;
|
||||
aI[offset + 1] = 0;
|
||||
return aI;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public int[] store_I2_zero_offs_nonalloc_direct() {
|
||||
aI[offset + 0] = 0;
|
||||
aI[offset + 1] = 0;
|
||||
return aI;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user