8335392: C2 MergeStores: enhanced pointer parsing
Co-authored-by: Christian Hagedorn <chagedorn@openjdk.org> Reviewed-by: kvn, chagedorn
This commit is contained in:
parent
4fc6d4135e
commit
f3671beefb
@ -33,6 +33,7 @@
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "opto/phasetype.hpp"
|
||||
#include "opto/traceAutoVectorizationTag.hpp"
|
||||
#include "opto/traceMergeStoresTag.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
|
||||
CompilerDirectives::CompilerDirectives() : _next(nullptr), _match(nullptr), _ref_count(0) {
|
||||
@ -302,7 +303,8 @@ DirectiveSet::DirectiveSet(CompilerDirectives* d) :
|
||||
_inlinematchers(nullptr),
|
||||
_directive(d),
|
||||
_ideal_phase_name_set(PHASE_NUM_TYPES, mtCompiler),
|
||||
_trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler)
|
||||
_trace_auto_vectorization_tags(TRACE_AUTO_VECTORIZATION_TAG_NUM, mtCompiler),
|
||||
_trace_merge_stores_tags(TraceMergeStores::TAG_NUM, mtCompiler)
|
||||
{
|
||||
#define init_defaults_definition(name, type, dvalue, compiler) this->name##Option = dvalue;
|
||||
compilerdirectives_common_flags(init_defaults_definition)
|
||||
@ -432,7 +434,6 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle
|
||||
compilerdirectives_c1_flags(init_default_cc)
|
||||
#undef init_default_cc
|
||||
|
||||
// Parse PrintIdealPhaseName and create a lookup set
|
||||
#ifndef PRODUCT
|
||||
#ifdef COMPILER2
|
||||
if (!_modified[TraceAutoVectorizationIndex]) {
|
||||
@ -445,6 +446,17 @@ DirectiveSet* DirectiveSet::compilecommand_compatibility_init(const methodHandle
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!_modified[TraceMergeStoresIndex]) {
|
||||
// Parse ccstr and create mask
|
||||
ccstrlist option;
|
||||
if (CompilerOracle::has_option_value(method, CompileCommandEnum::TraceMergeStores, option)) {
|
||||
TraceMergeStores::TagValidator validator(option, false);
|
||||
if (validator.is_valid()) {
|
||||
set.cloned()->set_trace_merge_stores_tags(validator.tags());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Parse PrintIdealPhaseName and create a lookup set
|
||||
if (!_modified[PrintIdealPhaseIndex]) {
|
||||
// Parse ccstr and create set
|
||||
ccstrlist option;
|
||||
|
@ -90,6 +90,7 @@ NOT_PRODUCT(cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLeve
|
||||
cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit)
|
||||
#define compilerdirectives_c2_string_flags(cflags) \
|
||||
NOT_PRODUCT(cflags(TraceAutoVectorization, ccstrlist, "", TraceAutoVectorization)) \
|
||||
NOT_PRODUCT(cflags(TraceMergeStores, ccstrlist, "", TraceMergeStores)) \
|
||||
NOT_PRODUCT(cflags(PrintIdealPhase, ccstrlist, "", PrintIdealPhase))
|
||||
#else
|
||||
#define compilerdirectives_c2_other_flags(cflags)
|
||||
@ -131,6 +132,7 @@ private:
|
||||
TriBoolArray<(size_t)vmIntrinsics::number_of_intrinsics(), int> _intrinsic_control_words;
|
||||
CHeapBitMap _ideal_phase_name_set;
|
||||
CHeapBitMap _trace_auto_vectorization_tags;
|
||||
CHeapBitMap _trace_merge_stores_tags;
|
||||
|
||||
public:
|
||||
DirectiveSet(CompilerDirectives* directive);
|
||||
@ -211,6 +213,12 @@ void set_##name(void* value) { \
|
||||
const CHeapBitMap& trace_auto_vectorization_tags() {
|
||||
return _trace_auto_vectorization_tags;
|
||||
};
|
||||
void set_trace_merge_stores_tags(const CHeapBitMap& tags) {
|
||||
_trace_merge_stores_tags.set_from(tags);
|
||||
};
|
||||
const CHeapBitMap& trace_merge_stores_tags() {
|
||||
return _trace_merge_stores_tags;
|
||||
};
|
||||
|
||||
void print_intx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" INTX_FORMAT " ", n, v); } }
|
||||
void print_uintx(outputStream* st, ccstr n, intx v, bool mod) { if (mod) { st->print("%s:" UINTX_FORMAT " ", n, v); } }
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "oops/symbol.hpp"
|
||||
#include "opto/phasetype.hpp"
|
||||
#include "opto/traceAutoVectorizationTag.hpp"
|
||||
#include "opto/traceMergeStoresTag.hpp"
|
||||
#include "runtime/globals_extension.hpp"
|
||||
#include "runtime/handles.inline.hpp"
|
||||
#include "runtime/jniHandles.hpp"
|
||||
@ -802,6 +803,12 @@ static void scan_value(enum OptionType type, char* line, int& total_bytes_read,
|
||||
else if (option == CompileCommandEnum::TraceAutoVectorization) {
|
||||
TraceAutoVectorizationTagValidator validator(value, true);
|
||||
|
||||
if (!validator.is_valid()) {
|
||||
jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what());
|
||||
}
|
||||
} else if (option == CompileCommandEnum::TraceMergeStores) {
|
||||
TraceMergeStores::TagValidator validator(value, true);
|
||||
|
||||
if (!validator.is_valid()) {
|
||||
jio_snprintf(errorbuf, buf_size, "Unrecognized tag name in %s: %s", option2name(option), validator.what());
|
||||
}
|
||||
|
@ -86,6 +86,7 @@ NOT_PRODUCT(option(PrintIdeal, "PrintIdeal", Bool)) \
|
||||
NOT_PRODUCT(option(PrintIdealPhase, "PrintIdealPhase", Ccstrlist)) \
|
||||
NOT_PRODUCT(option(IGVPrintLevel, "IGVPrintLevel", Intx)) \
|
||||
NOT_PRODUCT(option(TraceAutoVectorization, "TraceAutoVectorization", Ccstrlist)) \
|
||||
NOT_PRODUCT(option(TraceMergeStores, "TraceMergeStores", Ccstrlist)) \
|
||||
option(Vectorize, "Vectorize", Bool) \
|
||||
option(CloneMapDebug, "CloneMapDebug", Bool) \
|
||||
option(IncrementalInlineForceCleanup, "IncrementalInlineForceCleanup", Bool) \
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "memory/resourceArea.hpp"
|
||||
#include "opto/phasetype.hpp"
|
||||
#include "opto/traceAutoVectorizationTag.hpp"
|
||||
#include "opto/traceMergeStoresTag.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include <string.h>
|
||||
|
||||
@ -347,6 +348,15 @@ bool DirectivesParser::set_option_flag(JSON_TYPE t, JSON_VAL* v, const key* opti
|
||||
} else {
|
||||
error(VALUE_ERROR, "Unrecognized tag name detected in TraceAutoVectorization: %s", validator.what());
|
||||
}
|
||||
} else if (strncmp(option_key->name, "TraceMergeStores", 16) == 0) {
|
||||
TraceMergeStores::TagValidator validator(s, false);
|
||||
|
||||
valid = validator.is_valid();
|
||||
if (valid) {
|
||||
set->set_trace_merge_stores_tags(validator.tags());
|
||||
} else {
|
||||
error(VALUE_ERROR, "Unrecognized tag name detected in TraceMergeStores: %s", validator.what());
|
||||
}
|
||||
} else if (strncmp(option_key->name, "PrintIdealPhase", 15) == 0) {
|
||||
PhaseNameValidator validator(s);
|
||||
|
||||
|
@ -367,9 +367,6 @@
|
||||
product(bool, MergeStores, true, DIAGNOSTIC, \
|
||||
"Optimize stores by combining values into larger store") \
|
||||
\
|
||||
develop(bool, TraceMergeStores, false, \
|
||||
"Trace creation of merged stores") \
|
||||
\
|
||||
product_pd(bool, OptoBundling, \
|
||||
"Generate nops to fill i-cache lines") \
|
||||
\
|
||||
|
@ -43,11 +43,13 @@
|
||||
#include "opto/machnode.hpp"
|
||||
#include "opto/matcher.hpp"
|
||||
#include "opto/memnode.hpp"
|
||||
#include "opto/mempointer.hpp"
|
||||
#include "opto/mulnode.hpp"
|
||||
#include "opto/narrowptrnode.hpp"
|
||||
#include "opto/phaseX.hpp"
|
||||
#include "opto/regmask.hpp"
|
||||
#include "opto/rootnode.hpp"
|
||||
#include "opto/traceMergeStoresTag.hpp"
|
||||
#include "opto/vectornode.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/copy.hpp"
|
||||
@ -2738,184 +2740,6 @@ uint StoreNode::hash() const {
|
||||
return NO_HASH;
|
||||
}
|
||||
|
||||
// Class to parse array pointers, and determine if they are adjacent. We parse the form:
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
//
|
||||
// Note: we accumulate all constant offsets into constant_offset, even the int constant behind
|
||||
// the "LShiftL(ConvI2L(...))" pattern. We convert "ConvI2L(int_offset + int_con)" to
|
||||
// "ConvI2L(int_offset) + int_con", which is only safe if we can assume that either all
|
||||
// compared addresses have an overflow for "int_offset + int_con" or none.
|
||||
// For loads and stores on arrays, we know that if one overflows and the other not, then
|
||||
// the two addresses lay almost max_int indices apart, but the maximal array size is
|
||||
// only about half of that. Therefore, the RangeCheck on at least one of them must have
|
||||
// failed.
|
||||
//
|
||||
// constant_offset += LShiftL( ConvI2L(int_con), int_offset_shift)
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
class ArrayPointer {
|
||||
private:
|
||||
const Node* _pointer; // The final pointer to the position in the array
|
||||
const Node* _base; // Base address of the array
|
||||
const jlong _constant_offset; // Sum of collected constant offsets
|
||||
const Node* _int_offset; // (optional) Offset behind LShiftL and ConvI2L
|
||||
const GrowableArray<Node*>* _other_offsets; // List of other AddP offsets
|
||||
const jint _int_offset_shift; // (optional) Shift value for int_offset
|
||||
const bool _is_valid; // The parsing succeeded
|
||||
|
||||
ArrayPointer(const bool is_valid,
|
||||
const Node* pointer,
|
||||
const Node* base,
|
||||
const jlong constant_offset,
|
||||
const Node* int_offset,
|
||||
const jint int_offset_shift,
|
||||
const GrowableArray<Node*>* other_offsets) :
|
||||
_pointer(pointer),
|
||||
_base(base),
|
||||
_constant_offset(constant_offset),
|
||||
_int_offset(int_offset),
|
||||
_other_offsets(other_offsets),
|
||||
_int_offset_shift(int_offset_shift),
|
||||
_is_valid(is_valid)
|
||||
{
|
||||
assert(_pointer != nullptr, "must always have pointer");
|
||||
assert(is_valid == (_base != nullptr), "have base exactly if valid");
|
||||
assert(is_valid == (_other_offsets != nullptr), "have other_offsets exactly if valid");
|
||||
}
|
||||
|
||||
static ArrayPointer make_invalid(const Node* pointer) {
|
||||
return ArrayPointer(false, pointer, nullptr, 0, nullptr, 0, nullptr);
|
||||
}
|
||||
|
||||
static bool parse_int_offset(Node* offset, Node*& int_offset, jint& int_offset_shift) {
|
||||
// offset = LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
if (offset->Opcode() == Op_LShiftL &&
|
||||
offset->in(1)->Opcode() == Op_ConvI2L &&
|
||||
offset->in(2)->Opcode() == Op_ConI) {
|
||||
int_offset = offset->in(1)->in(1); // LShiftL -> ConvI2L -> int_offset
|
||||
int_offset_shift = offset->in(2)->get_int(); // LShiftL -> int_offset_shift
|
||||
return true;
|
||||
}
|
||||
|
||||
// offset = ConvI2L(int_offset) = LShiftL( ConvI2L(int_offset), 0)
|
||||
if (offset->Opcode() == Op_ConvI2L) {
|
||||
int_offset = offset->in(1);
|
||||
int_offset_shift = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// parse failed
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
// Parse the structure above the pointer
|
||||
static ArrayPointer make(PhaseGVN* phase, const Node* pointer) {
|
||||
assert(phase->type(pointer)->isa_aryptr() != nullptr, "must be array pointer");
|
||||
if (!pointer->is_AddP()) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
const Node* base = pointer->in(AddPNode::Base);
|
||||
if (base == nullptr) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
const int search_depth = 5;
|
||||
Node* offsets[search_depth];
|
||||
int count = pointer->as_AddP()->unpack_offsets(offsets, search_depth);
|
||||
|
||||
// We expect at least a constant each
|
||||
if (count <= 0) { return ArrayPointer::make_invalid(pointer); }
|
||||
|
||||
// We extract the form:
|
||||
//
|
||||
// pointer = base
|
||||
// + constant_offset
|
||||
// + LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
// + sum(other_offsets)
|
||||
//
|
||||
jlong constant_offset = 0;
|
||||
Node* int_offset = nullptr;
|
||||
jint int_offset_shift = 0;
|
||||
GrowableArray<Node*>* other_offsets = new GrowableArray<Node*>(count);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
Node* offset = offsets[i];
|
||||
if (offset->Opcode() == Op_ConI) {
|
||||
// Constant int offset
|
||||
constant_offset += offset->get_int();
|
||||
} else if (offset->Opcode() == Op_ConL) {
|
||||
// Constant long offset
|
||||
constant_offset += offset->get_long();
|
||||
} else if(int_offset == nullptr && parse_int_offset(offset, int_offset, int_offset_shift)) {
|
||||
// LShiftL( ConvI2L(int_offset), int_offset_shift)
|
||||
int_offset = int_offset->uncast();
|
||||
if (int_offset->Opcode() == Op_AddI && int_offset->in(2)->Opcode() == Op_ConI) {
|
||||
// LShiftL( ConvI2L(int_offset + int_con), int_offset_shift)
|
||||
constant_offset += ((jlong)int_offset->in(2)->get_int()) << int_offset_shift;
|
||||
int_offset = int_offset->in(1);
|
||||
}
|
||||
} else {
|
||||
// All others
|
||||
other_offsets->append(offset);
|
||||
}
|
||||
}
|
||||
|
||||
return ArrayPointer(true, pointer, base, constant_offset, int_offset, int_offset_shift, other_offsets);
|
||||
}
|
||||
|
||||
bool is_adjacent_to_and_before(const ArrayPointer& other, const jlong data_size) const {
|
||||
if (!_is_valid || !other._is_valid) { return false; }
|
||||
|
||||
// Offset adjacent?
|
||||
if (this->_constant_offset + data_size != other._constant_offset) { return false; }
|
||||
|
||||
// All other components identical?
|
||||
if (this->_base != other._base ||
|
||||
this->_int_offset != other._int_offset ||
|
||||
this->_int_offset_shift != other._int_offset_shift ||
|
||||
this->_other_offsets->length() != other._other_offsets->length()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < this->_other_offsets->length(); i++) {
|
||||
Node* o1 = this->_other_offsets->at(i);
|
||||
Node* o2 = other._other_offsets->at(i);
|
||||
if (o1 != o2) { return false; }
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void dump() {
|
||||
if (!_is_valid) {
|
||||
tty->print("ArrayPointer[%d %s, invalid]", _pointer->_idx, _pointer->Name());
|
||||
return;
|
||||
}
|
||||
tty->print("ArrayPointer[%d %s, base[%d %s] + %lld",
|
||||
_pointer->_idx, _pointer->Name(),
|
||||
_base->_idx, _base->Name(),
|
||||
(long long)_constant_offset);
|
||||
if (_int_offset != nullptr) {
|
||||
tty->print(" + I2L[%d %s] << %d",
|
||||
_int_offset->_idx, _int_offset->Name(), _int_offset_shift);
|
||||
}
|
||||
for (int i = 0; i < _other_offsets->length(); i++) {
|
||||
Node* n = _other_offsets->at(i);
|
||||
tty->print(" + [%d %s]", n->_idx, n->Name());
|
||||
}
|
||||
tty->print_cr("]");
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Link together multiple stores (B/S/C/I) into a longer one.
|
||||
//
|
||||
// Example: _store = StoreB[i+3]
|
||||
@ -2951,13 +2775,18 @@ public:
|
||||
// of adjacent stores there remains exactly one RangeCheck, located between the
|
||||
// first and the second store (e.g. RangeCheck[i+3]).
|
||||
//
|
||||
class MergePrimitiveArrayStores : public StackObj {
|
||||
class MergePrimitiveStores : public StackObj {
|
||||
private:
|
||||
PhaseGVN* _phase;
|
||||
StoreNode* _store;
|
||||
PhaseGVN* const _phase;
|
||||
StoreNode* const _store;
|
||||
|
||||
NOT_PRODUCT( const CHeapBitMap &_trace_tags; )
|
||||
|
||||
public:
|
||||
MergePrimitiveArrayStores(PhaseGVN* phase, StoreNode* store) : _phase(phase), _store(store) {}
|
||||
MergePrimitiveStores(PhaseGVN* phase, StoreNode* store) :
|
||||
_phase(phase), _store(store)
|
||||
NOT_PRODUCT( COMMA _trace_tags(Compile::current()->directive()->trace_merge_stores_tags()) )
|
||||
{}
|
||||
|
||||
StoreNode* run();
|
||||
|
||||
@ -2988,6 +2817,17 @@ private:
|
||||
}
|
||||
return Status(found_store, cfg_status == CFGStatus::SuccessWithRangeCheck);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void print_on(outputStream* st) const {
|
||||
if (_found_store == nullptr) {
|
||||
st->print_cr("None");
|
||||
} else {
|
||||
st->print_cr("Found[%d %s, %s]", _found_store->_idx, _found_store->Name(),
|
||||
_found_range_check ? "RC" : "no-RC");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
Status find_adjacent_use_store(const StoreNode* def_store) const;
|
||||
@ -3001,43 +2841,56 @@ private:
|
||||
Node* make_merged_input_value(const Node_List& merge_list);
|
||||
StoreNode* make_merged_store(const Node_List& merge_list, Node* merged_input_value);
|
||||
|
||||
DEBUG_ONLY( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; )
|
||||
#ifndef PRODUCT
|
||||
// Access to TraceMergeStores tags
|
||||
bool is_trace(TraceMergeStores::Tag tag) const {
|
||||
return _trace_tags.at(tag);
|
||||
}
|
||||
|
||||
bool is_trace_basic() const {
|
||||
return is_trace(TraceMergeStores::Tag::BASIC);
|
||||
}
|
||||
|
||||
bool is_trace_pointer() const {
|
||||
return is_trace(TraceMergeStores::Tag::POINTER);
|
||||
}
|
||||
|
||||
bool is_trace_aliasing() const {
|
||||
return is_trace(TraceMergeStores::Tag::ALIASING);
|
||||
}
|
||||
|
||||
bool is_trace_adjacency() const {
|
||||
return is_trace(TraceMergeStores::Tag::ADJACENCY);
|
||||
}
|
||||
|
||||
bool is_trace_success() const {
|
||||
return is_trace(TraceMergeStores::Tag::SUCCESS);
|
||||
}
|
||||
#endif
|
||||
|
||||
NOT_PRODUCT( void trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const; )
|
||||
};
|
||||
|
||||
StoreNode* MergePrimitiveArrayStores::run() {
|
||||
StoreNode* MergePrimitiveStores::run() {
|
||||
// Check for B/S/C/I
|
||||
int opc = _store->Opcode();
|
||||
if (opc != Op_StoreB && opc != Op_StoreC && opc != Op_StoreI) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Only merge stores on arrays, and the stores must have the same size as the elements.
|
||||
const TypePtr* ptr_t = _store->adr_type();
|
||||
if (ptr_t == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
const TypeAryPtr* aryptr_t = ptr_t->isa_aryptr();
|
||||
if (aryptr_t == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
BasicType bt = aryptr_t->elem()->array_element_basic_type();
|
||||
if (!is_java_primitive(bt) ||
|
||||
type2aelembytes(bt) != _store->memory_size()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (_store->is_unsafe_access()) {
|
||||
return nullptr;
|
||||
}
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] MergePrimitiveStores::run: "); _store->dump(); })
|
||||
|
||||
// The _store must be the "last" store in a chain. If we find a use we could merge with
|
||||
// then that use or a store further down is the "last" store.
|
||||
Status status_use = find_adjacent_use_store(_store);
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect no use: "); status_use.print_on(tty); })
|
||||
if (status_use.found_store() != nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Check if we can merge with at least one def, so that we have at least 2 stores to merge.
|
||||
Status status_def = find_adjacent_def_store(_store);
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] expect def: "); status_def.print_on(tty); })
|
||||
if (status_def.found_store() == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
@ -3051,45 +2904,25 @@ StoreNode* MergePrimitiveArrayStores::run() {
|
||||
|
||||
StoreNode* merged_store = make_merged_store(merge_list, merged_input_value);
|
||||
|
||||
DEBUG_ONLY( if(TraceMergeStores) { trace(merge_list, merged_input_value, merged_store); } )
|
||||
NOT_PRODUCT( if (is_trace_success()) { trace(merge_list, merged_input_value, merged_store); } )
|
||||
|
||||
return merged_store;
|
||||
}
|
||||
|
||||
// Check compatibility between _store and other_store.
|
||||
bool MergePrimitiveArrayStores::is_compatible_store(const StoreNode* other_store) const {
|
||||
bool MergePrimitiveStores::is_compatible_store(const StoreNode* other_store) const {
|
||||
int opc = _store->Opcode();
|
||||
assert(opc == Op_StoreB || opc == Op_StoreC || opc == Op_StoreI, "precondition");
|
||||
assert(_store->adr_type()->isa_aryptr() != nullptr, "must be array store");
|
||||
assert(!_store->is_unsafe_access(), "no unsafe accesses");
|
||||
|
||||
if (other_store == nullptr ||
|
||||
_store->Opcode() != other_store->Opcode() ||
|
||||
other_store->adr_type() == nullptr ||
|
||||
other_store->adr_type()->isa_aryptr() == nullptr ||
|
||||
other_store->is_unsafe_access()) {
|
||||
_store->Opcode() != other_store->Opcode()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that the size of the stores, and the array elements are all the same.
|
||||
const TypeAryPtr* aryptr_t1 = _store->adr_type()->is_aryptr();
|
||||
const TypeAryPtr* aryptr_t2 = other_store->adr_type()->is_aryptr();
|
||||
BasicType aryptr_bt1 = aryptr_t1->elem()->array_element_basic_type();
|
||||
BasicType aryptr_bt2 = aryptr_t2->elem()->array_element_basic_type();
|
||||
if (!is_java_primitive(aryptr_bt1) || !is_java_primitive(aryptr_bt2)) {
|
||||
return false;
|
||||
}
|
||||
int size1 = type2aelembytes(aryptr_bt1);
|
||||
int size2 = type2aelembytes(aryptr_bt2);
|
||||
if (size1 != size2 ||
|
||||
size1 != _store->memory_size() ||
|
||||
_store->memory_size() != other_store->memory_size()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const {
|
||||
bool MergePrimitiveStores::is_adjacent_pair(const StoreNode* use_store, const StoreNode* def_store) const {
|
||||
if (!is_adjacent_input_pair(def_store->in(MemNode::ValueIn),
|
||||
use_store->in(MemNode::ValueIn),
|
||||
def_store->memory_size())) {
|
||||
@ -3097,16 +2930,17 @@ bool MergePrimitiveArrayStores::is_adjacent_pair(const StoreNode* use_store, con
|
||||
}
|
||||
|
||||
ResourceMark rm;
|
||||
ArrayPointer array_pointer_use = ArrayPointer::make(_phase, use_store->in(MemNode::Address));
|
||||
ArrayPointer array_pointer_def = ArrayPointer::make(_phase, def_store->in(MemNode::Address));
|
||||
if (!array_pointer_def.is_adjacent_to_and_before(array_pointer_use, use_store->memory_size())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#ifndef PRODUCT
|
||||
const TraceMemPointer trace(is_trace_pointer(),
|
||||
is_trace_aliasing(),
|
||||
is_trace_adjacency());
|
||||
#endif
|
||||
const MemPointer pointer_use(use_store NOT_PRODUCT( COMMA trace ));
|
||||
const MemPointer pointer_def(def_store NOT_PRODUCT( COMMA trace ));
|
||||
return pointer_def.is_adjacent_to_and_before(pointer_use);
|
||||
}
|
||||
|
||||
bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const {
|
||||
bool MergePrimitiveStores::is_adjacent_input_pair(const Node* n1, const Node* n2, const int memory_size) const {
|
||||
// Pattern: [n1 = ConI, n2 = ConI]
|
||||
if (n1->Opcode() == Op_ConI) {
|
||||
return n2->Opcode() == Op_ConI;
|
||||
@ -3148,7 +2982,7 @@ bool MergePrimitiveArrayStores::is_adjacent_input_pair(const Node* n1, const Nod
|
||||
}
|
||||
|
||||
// Detect pattern: n = base_out >> shift_out
|
||||
bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) {
|
||||
bool MergePrimitiveStores::is_con_RShift(const Node* n, Node const*& base_out, jint& shift_out) {
|
||||
assert(n != nullptr, "precondition");
|
||||
|
||||
int opc = n->Opcode();
|
||||
@ -3171,7 +3005,7 @@ bool MergePrimitiveArrayStores::is_con_RShift(const Node* n, Node const*& base_o
|
||||
}
|
||||
|
||||
// Check if there is nothing between the two stores, except optionally a RangeCheck leading to an uncommon trap.
|
||||
MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) {
|
||||
MergePrimitiveStores::CFGStatus MergePrimitiveStores::cfg_status_for_pair(const StoreNode* use_store, const StoreNode* def_store) {
|
||||
assert(use_store->in(MemNode::Memory) == def_store, "use-def relationship");
|
||||
|
||||
Node* ctrl_use = use_store->in(MemNode::Control);
|
||||
@ -3216,7 +3050,7 @@ MergePrimitiveArrayStores::CFGStatus MergePrimitiveArrayStores::cfg_status_for_p
|
||||
return CFGStatus::SuccessWithRangeCheck;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_store(const StoreNode* def_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_use_store(const StoreNode* def_store) const {
|
||||
Status status_use = find_use_store(def_store);
|
||||
StoreNode* use_store = status_use.found_store();
|
||||
if (use_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
|
||||
@ -3225,7 +3059,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_use_s
|
||||
return status_use;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_store(const StoreNode* use_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_adjacent_def_store(const StoreNode* use_store) const {
|
||||
Status status_def = find_def_store(use_store);
|
||||
StoreNode* def_store = status_def.found_store();
|
||||
if (def_store != nullptr && !is_adjacent_pair(use_store, def_store)) {
|
||||
@ -3234,7 +3068,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_adjacent_def_s
|
||||
return status_def;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(const StoreNode* def_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_use_store(const StoreNode* def_store) const {
|
||||
Status status_use = find_use_store_unidirectional(def_store);
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -3250,7 +3084,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store(cons
|
||||
return status_use;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(const StoreNode* use_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_def_store(const StoreNode* use_store) const {
|
||||
Status status_def = find_def_store_unidirectional(use_store);
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -3266,7 +3100,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store(cons
|
||||
return status_def;
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unidirectional(const StoreNode* def_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_use_store_unidirectional(const StoreNode* def_store) const {
|
||||
assert(is_compatible_store(def_store), "precondition: must be compatible with _store");
|
||||
|
||||
for (DUIterator_Fast imax, i = def_store->fast_outs(imax); i < imax; i++) {
|
||||
@ -3279,7 +3113,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_use_store_unid
|
||||
return Status::make_failure();
|
||||
}
|
||||
|
||||
MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unidirectional(const StoreNode* use_store) const {
|
||||
MergePrimitiveStores::Status MergePrimitiveStores::find_def_store_unidirectional(const StoreNode* use_store) const {
|
||||
assert(is_compatible_store(use_store), "precondition: must be compatible with _store");
|
||||
|
||||
StoreNode* def_store = use_store->in(MemNode::Memory)->isa_Store();
|
||||
@ -3290,7 +3124,7 @@ MergePrimitiveArrayStores::Status MergePrimitiveArrayStores::find_def_store_unid
|
||||
return Status::make(def_store, cfg_status_for_pair(use_store, def_store));
|
||||
}
|
||||
|
||||
void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const {
|
||||
void MergePrimitiveStores::collect_merge_list(Node_List& merge_list) const {
|
||||
// The merged store can be at most 8 bytes.
|
||||
const uint merge_list_max_size = 8 / _store->memory_size();
|
||||
assert(merge_list_max_size >= 2 &&
|
||||
@ -3303,25 +3137,32 @@ void MergePrimitiveArrayStores::collect_merge_list(Node_List& merge_list) const
|
||||
merge_list.push(current);
|
||||
while (current != nullptr && merge_list.size() < merge_list_max_size) {
|
||||
Status status = find_adjacent_def_store(current);
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print("[TraceMergeStores] find def: "); status.print_on(tty); })
|
||||
|
||||
current = status.found_store();
|
||||
if (current != nullptr) {
|
||||
merge_list.push(current);
|
||||
|
||||
// We can have at most one RangeCheck.
|
||||
if (status.found_range_check()) {
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found RangeCheck, stop traversal."); })
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] found:"); merge_list.dump(); })
|
||||
|
||||
// Truncate the merge_list to a power of 2.
|
||||
const uint pow2size = round_down_power_of_2(merge_list.size());
|
||||
assert(pow2size >= 2, "must be merging at least 2 stores");
|
||||
while (merge_list.size() > pow2size) { merge_list.pop(); }
|
||||
|
||||
NOT_PRODUCT( if (is_trace_basic()) { tty->print_cr("[TraceMergeStores] truncated:"); merge_list.dump(); })
|
||||
}
|
||||
|
||||
// Merge the input values of the smaller stores to a single larger input value.
|
||||
Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_list) {
|
||||
Node* MergePrimitiveStores::make_merged_input_value(const Node_List& merge_list) {
|
||||
int new_memory_size = _store->memory_size() * merge_list.size();
|
||||
Node* first = merge_list.at(merge_list.size()-1);
|
||||
Node* merged_input_value = nullptr;
|
||||
@ -3407,7 +3248,7 @@ Node* MergePrimitiveArrayStores::make_merged_input_value(const Node_List& merge_
|
||||
// | | | | | | | | //
|
||||
// last_store (= _store) merged_store //
|
||||
// //
|
||||
StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) {
|
||||
StoreNode* MergePrimitiveStores::make_merged_store(const Node_List& merge_list, Node* merged_input_value) {
|
||||
Node* first_store = merge_list.at(merge_list.size()-1);
|
||||
Node* last_ctrl = _store->in(MemNode::Control); // after (optional) RangeCheck
|
||||
Node* first_mem = first_store->in(MemNode::Memory);
|
||||
@ -3436,8 +3277,8 @@ StoreNode* MergePrimitiveArrayStores::make_merged_store(const Node_List& merge_l
|
||||
return merged_store;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
void MergePrimitiveArrayStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const {
|
||||
#ifndef PRODUCT
|
||||
void MergePrimitiveStores::trace(const Node_List& merge_list, const Node* merged_input_value, const StoreNode* merged_store) const {
|
||||
stringStream ss;
|
||||
ss.print_cr("[TraceMergeStores]: Replace");
|
||||
for (int i = (int)merge_list.size() - 1; i >= 0; i--) {
|
||||
@ -3535,7 +3376,7 @@ Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
|
||||
if (MergeStores && UseUnalignedAccesses) {
|
||||
if (phase->C->post_loop_opts_phase()) {
|
||||
MergePrimitiveArrayStores merge(phase, this);
|
||||
MergePrimitiveStores merge(phase, this);
|
||||
Node* progress = merge.run();
|
||||
if (progress != nullptr) { return progress; }
|
||||
} else {
|
||||
|
383
src/hotspot/share/opto/mempointer.cpp
Normal file
383
src/hotspot/share/opto/mempointer.cpp
Normal file
@ -0,0 +1,383 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "opto/mempointer.hpp"
|
||||
#include "utilities/resourceHash.hpp"
|
||||
|
||||
// Recursively parse the pointer expression with a DFS all-path traversal
|
||||
// (i.e. with node repetitions), starting at the pointer.
|
||||
MemPointerDecomposedForm MemPointerDecomposedFormParser::parse_decomposed_form() {
|
||||
assert(_worklist.is_empty(), "no prior parsing");
|
||||
assert(_summands.is_empty(), "no prior parsing");
|
||||
|
||||
Node* pointer = _mem->in(MemNode::Address);
|
||||
|
||||
// Start with the trivial summand.
|
||||
_worklist.push(MemPointerSummand(pointer, NoOverflowInt(1)));
|
||||
|
||||
// Decompose the summands until only terminal summands remain. This effectively
|
||||
// parses the pointer expression recursively.
|
||||
int traversal_count = 0;
|
||||
while (_worklist.is_nonempty()) {
|
||||
// Bail out if the graph is too complex.
|
||||
if (traversal_count++ > 1000) { return MemPointerDecomposedForm::make_trivial(pointer); }
|
||||
parse_sub_expression(_worklist.pop());
|
||||
}
|
||||
|
||||
// Bail out if there is a constant overflow.
|
||||
if (_con.is_NaN()) { return MemPointerDecomposedForm::make_trivial(pointer); }
|
||||
|
||||
// Sorting by variable idx means that all summands with the same variable are consecutive.
|
||||
// This simplifies the combining of summands with the same variable below.
|
||||
_summands.sort(MemPointerSummand::cmp_by_variable_idx);
|
||||
|
||||
// Combine summands for the same variable, adding up the scales.
|
||||
int pos_put = 0;
|
||||
int pos_get = 0;
|
||||
while (pos_get < _summands.length()) {
|
||||
const MemPointerSummand& summand = _summands.at(pos_get++);
|
||||
Node* variable = summand.variable();
|
||||
NoOverflowInt scale = summand.scale();
|
||||
// Add up scale of all summands with the same variable.
|
||||
while (pos_get < _summands.length() && _summands.at(pos_get).variable() == variable) {
|
||||
MemPointerSummand s = _summands.at(pos_get++);
|
||||
scale = scale + s.scale();
|
||||
}
|
||||
// Bail out if scale is NaN.
|
||||
if (scale.is_NaN()) {
|
||||
return MemPointerDecomposedForm::make_trivial(pointer);
|
||||
}
|
||||
// Keep summands with non-zero scale.
|
||||
if (!scale.is_zero()) {
|
||||
_summands.at_put(pos_put++, MemPointerSummand(variable, scale));
|
||||
}
|
||||
}
|
||||
_summands.trunc_to(pos_put);
|
||||
|
||||
return MemPointerDecomposedForm::make(pointer, _summands, _con);
|
||||
}
|
||||
|
||||
// Parse a sub-expression of the pointer, starting at the current summand. We parse the
|
||||
// current node, and see if it can be decomposed into further summands, or if the current
|
||||
// summand is terminal.
|
||||
void MemPointerDecomposedFormParser::parse_sub_expression(const MemPointerSummand& summand) {
|
||||
Node* n = summand.variable();
|
||||
const NoOverflowInt scale = summand.scale();
|
||||
const NoOverflowInt one(1);
|
||||
|
||||
int opc = n->Opcode();
|
||||
if (is_safe_to_decompose_op(opc, scale)) {
|
||||
switch (opc) {
|
||||
case Op_ConI:
|
||||
case Op_ConL:
|
||||
{
|
||||
// Terminal: add to constant.
|
||||
NoOverflowInt con = (opc == Op_ConI) ? NoOverflowInt(n->get_int())
|
||||
: NoOverflowInt(n->get_long());
|
||||
_con = _con + scale * con;
|
||||
return;
|
||||
}
|
||||
case Op_AddP:
|
||||
case Op_AddL:
|
||||
case Op_AddI:
|
||||
{
|
||||
// Decompose addition.
|
||||
Node* a = n->in((opc == Op_AddP) ? 2 : 1);
|
||||
Node* b = n->in((opc == Op_AddP) ? 3 : 2);
|
||||
_worklist.push(MemPointerSummand(a, scale));
|
||||
_worklist.push(MemPointerSummand(b, scale));
|
||||
return;
|
||||
}
|
||||
case Op_SubL:
|
||||
case Op_SubI:
|
||||
{
|
||||
// Decompose subtraction.
|
||||
Node* a = n->in(1);
|
||||
Node* b = n->in(2);
|
||||
|
||||
NoOverflowInt sub_scale = NoOverflowInt(-1) * scale;
|
||||
|
||||
_worklist.push(MemPointerSummand(a, scale));
|
||||
_worklist.push(MemPointerSummand(b, sub_scale));
|
||||
return;
|
||||
}
|
||||
case Op_MulL:
|
||||
case Op_MulI:
|
||||
case Op_LShiftL:
|
||||
case Op_LShiftI:
|
||||
{
|
||||
// Only multiplication with constants is allowed: factor * variable
|
||||
// IGVN already folds constants to in(2). If we find a variable there
|
||||
// instead, we cannot further decompose this summand, and have to add
|
||||
// it to the terminal summands.
|
||||
Node* variable = n->in(1);
|
||||
Node* con = n->in(2);
|
||||
if (!con->is_Con()) { break; }
|
||||
NoOverflowInt factor;
|
||||
switch (opc) {
|
||||
case Op_MulL: // variable * con
|
||||
factor = NoOverflowInt(con->get_long());
|
||||
break;
|
||||
case Op_MulI: // variable * con
|
||||
factor = NoOverflowInt(con->get_int());
|
||||
break;
|
||||
case Op_LShiftL: // variable << con = variable * (1 << con)
|
||||
factor = one << NoOverflowInt(con->get_int());
|
||||
break;
|
||||
case Op_LShiftI: // variable << con = variable * (1 << con)
|
||||
factor = one << NoOverflowInt(con->get_int());
|
||||
break;
|
||||
}
|
||||
|
||||
// Accumulate scale.
|
||||
NoOverflowInt new_scale = scale * factor;
|
||||
|
||||
_worklist.push(MemPointerSummand(variable, new_scale));
|
||||
return;
|
||||
}
|
||||
case Op_CastII:
|
||||
case Op_CastLL:
|
||||
case Op_CastX2P:
|
||||
case Op_ConvI2L:
|
||||
// On 32bit systems we can also look through ConvL2I, since the final result will always
|
||||
// be truncated back with ConvL2I. On 64bit systems we cannot decompose ConvL2I because
|
||||
// such int values will eventually be expanded to long with a ConvI2L:
|
||||
//
|
||||
// valL = max_jint + 1
|
||||
// ConvI2L(ConvL2I(valL)) = ConvI2L(min_jint) = min_jint != max_jint + 1 = valL
|
||||
//
|
||||
NOT_LP64( case Op_ConvL2I: )
|
||||
{
|
||||
// Decompose: look through.
|
||||
Node* a = n->in(1);
|
||||
_worklist.push(MemPointerSummand(a, scale));
|
||||
return;
|
||||
}
|
||||
default:
|
||||
// All other operations cannot be further decomposed. We just add them to the
|
||||
// terminal summands below.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Default: we could not parse the "summand" further, i.e. it is terminal.
|
||||
_summands.push(summand);
|
||||
}
|
||||
|
||||
// Check if the decomposition of operation opc is guaranteed to be safe.
|
||||
// Please refer to the definition of "safe decomposition" in mempointer.hpp
|
||||
bool MemPointerDecomposedFormParser::is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const {
|
||||
#ifndef _LP64
|
||||
// On 32-bit platforms, the pointer has 32bits, and thus any higher bits will always
|
||||
// be truncated. Thus, it does not matter if we have int or long overflows.
|
||||
// Simply put: all decompositions are (SAFE1).
|
||||
return true;
|
||||
#else
|
||||
|
||||
switch (opc) {
|
||||
// These operations are always safe to decompose, i.e. (SAFE1):
|
||||
case Op_ConI:
|
||||
case Op_ConL:
|
||||
case Op_AddP:
|
||||
case Op_AddL:
|
||||
case Op_SubL:
|
||||
case Op_MulL:
|
||||
case Op_LShiftL:
|
||||
case Op_CastII:
|
||||
case Op_CastLL:
|
||||
case Op_CastX2P:
|
||||
case Op_CastPP:
|
||||
case Op_ConvI2L:
|
||||
return true;
|
||||
|
||||
// But on 64-bit platforms, these operations are not trivially safe to decompose:
|
||||
case Op_AddI: // ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b)
|
||||
case Op_SubI: // ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b)
|
||||
case Op_MulI: // ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI)
|
||||
case Op_LShiftI: // ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI)
|
||||
break; // Analysis below.
|
||||
|
||||
// All other operations are assumed not safe to decompose, or simply cannot be decomposed
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
const TypeAryPtr* ary_ptr_t = _mem->adr_type()->isa_aryptr();
|
||||
if (ary_ptr_t != nullptr) {
|
||||
// Array accesses that are not Unsafe always have a RangeCheck which ensures
|
||||
// that there is no int overflow. And without overflows, all decompositions
|
||||
// are (SAFE1).
|
||||
if (!_mem->is_unsafe_access()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Intuition: In general, the decomposition of AddI, SubI, MulI or LShiftI is not safe,
|
||||
// because of overflows. But under some conditions, we can prove that such a
|
||||
// decomposition is (SAFE2). Intuitively, we want to prove that an overflow
|
||||
// would mean that the pointers have such a large distance, that at least one
|
||||
// must lie out of bounds. In the proof of the "MemPointer Lemma", we thus
|
||||
// get a contradiction with the condition that both pointers are in bounds.
|
||||
//
|
||||
// We prove that the decomposition of AddI, SubI, MulI (with constant) and ShiftI (with
|
||||
// constant) is (SAFE2), under the condition:
|
||||
//
|
||||
// abs(scale) % array_element_size_in_bytes = 0
|
||||
//
|
||||
// First, we describe how the decomposition works:
|
||||
//
|
||||
// mp_i = con + sum(other_summands) + summand
|
||||
// ------------------------- -------
|
||||
// rest scale * ConvI2L(op)
|
||||
//
|
||||
// We decompose the summand depending on the op, where we know that there is some
|
||||
// integer y, such that:
|
||||
//
|
||||
// scale * ConvI2L(a + b) = scale * ConvI2L(a) + scale * ConvI2L(b) + scale * y * 2^32
|
||||
// scale * ConvI2L(a - b) = scale * ConvI2L(a) - scale * ConvI2L(b) + scale * y * 2^32
|
||||
// scale * ConvI2L(a * con) = scale * con * ConvI2L(a) + scale * y * 2^32
|
||||
// scale * ConvI2L(a << con) = scale * (1 << con) * ConvI2L(a) + scale * y * 2^32
|
||||
// \_______________________/ \_____________________________________/ \______________/
|
||||
// before decomposition after decomposition ("new_summands") overflow correction
|
||||
//
|
||||
// Thus, for AddI and SubI, we get:
|
||||
// summand = new_summand1 + new_summand2 + scale * y * 2^32
|
||||
//
|
||||
// mp_{i+1} = con + sum(other_summands) + new_summand1 + new_summand2
|
||||
// = con + sum(other_summands) + summand - scale * y * 2^32
|
||||
// = mp_i - scale * y * 2^32
|
||||
//
|
||||
// And for MulI and ShiftI we get:
|
||||
// summand = new_summand + scale * y * 2^32
|
||||
//
|
||||
// mp_{i+1} = con + sum(other_summands) + new_summand
|
||||
// = con + sum(other_summands) + summand - scale * y * 2^32
|
||||
// = mp_i - scale * y * 2^32
|
||||
//
|
||||
// Further:
|
||||
// abs(scale) % array_element_size_in_bytes = 0
|
||||
// implies that there is some integer z, such that:
|
||||
// z * array_element_size_in_bytes = scale
|
||||
//
|
||||
// And hence, with "x = y * z", the decomposition is (SAFE2) under the assumed condition:
|
||||
// mp_i = mp_{i+1} + scale * y * 2^32
|
||||
// = mp_{i+1} + z * array_element_size_in_bytes * y * 2^32
|
||||
// = mp_{i+1} + x * array_element_size_in_bytes * 2^32
|
||||
//
|
||||
BasicType array_element_bt = ary_ptr_t->elem()->array_element_basic_type();
|
||||
if (is_java_primitive(array_element_bt)) {
|
||||
NoOverflowInt array_element_size_in_bytes = NoOverflowInt(type2aelembytes(array_element_bt));
|
||||
if (scale.is_multiple_of(array_element_size_in_bytes)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Compute the aliasing between two MemPointerDecomposedForm. We use the "MemPointer Lemma" to
|
||||
// prove that the computed aliasing also applies for the underlying pointers. Note that the
|
||||
// condition (S0) is already given, because the MemPointerDecomposedForm is always constructed
|
||||
// using only safe decompositions.
|
||||
//
|
||||
// Pre-Condition:
|
||||
// We assume that both pointers are in-bounds of their respective memory object. If this does
|
||||
// not hold, for example, with the use of Unsafe, then we would already have undefined behavior,
|
||||
// and we are allowed to do anything.
|
||||
MemPointerAliasing MemPointerDecomposedForm::get_aliasing_with(const MemPointerDecomposedForm& other
|
||||
NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const {
|
||||
#ifndef PRODUCT
|
||||
if (trace.is_trace_aliasing()) {
|
||||
tty->print_cr("MemPointerDecomposedForm::get_aliasing_with:");
|
||||
print_on(tty);
|
||||
other.print_on(tty);
|
||||
}
|
||||
#endif
|
||||
|
||||
// "MemPointer Lemma" condition (S2): check if all summands are the same:
|
||||
for (uint i = 0; i < SUMMANDS_SIZE; i++) {
|
||||
const MemPointerSummand s1 = summands_at(i);
|
||||
const MemPointerSummand s2 = other.summands_at(i);
|
||||
if (s1 != s2) {
|
||||
#ifndef PRODUCT
|
||||
if (trace.is_trace_aliasing()) {
|
||||
tty->print_cr(" -> Aliasing unknown, differ on summand %d.", i);
|
||||
}
|
||||
#endif
|
||||
return MemPointerAliasing::make_unknown();
|
||||
}
|
||||
}
|
||||
|
||||
// "MemPointer Lemma" condition (S3): check that the constants do not differ too much:
|
||||
const NoOverflowInt distance = other.con() - con();
|
||||
// We must check that: abs(distance) < 2^32
|
||||
// However, this is only false if: distance = min_jint
|
||||
if (distance.is_NaN() || distance.value() == min_jint) {
|
||||
#ifndef PRODUCT
|
||||
if (trace.is_trace_aliasing()) {
|
||||
tty->print(" -> Aliasing unknown, bad distance: ");
|
||||
distance.print_on(tty);
|
||||
tty->cr();
|
||||
}
|
||||
#endif
|
||||
return MemPointerAliasing::make_unknown();
|
||||
}
|
||||
|
||||
// "MemPointer Lemma" condition (S1):
|
||||
// Given that all summands are the same, we know that both pointers point into the
|
||||
// same memory object. With the Pre-Condition, we know that both pointers are in
|
||||
// bounds of that same memory object.
|
||||
|
||||
// Hence, all 4 conditions of the "MemoryPointer Lemma" are established, and hence
|
||||
// we know that the distance between the underlying pointers is equal to the distance
|
||||
// we computed for the MemPointers:
|
||||
// p_other - p_this = distance = other.con - this.con
|
||||
#ifndef PRODUCT
|
||||
if (trace.is_trace_aliasing()) {
|
||||
tty->print_cr(" -> Aliasing always, distance = %d.", distance.value());
|
||||
}
|
||||
#endif
|
||||
return MemPointerAliasing::make_always(distance.value());
|
||||
}
|
||||
|
||||
bool MemPointer::is_adjacent_to_and_before(const MemPointer& other) const {
|
||||
const MemPointerDecomposedForm& s1 = decomposed_form();
|
||||
const MemPointerDecomposedForm& s2 = other.decomposed_form();
|
||||
const MemPointerAliasing aliasing = s1.get_aliasing_with(s2 NOT_PRODUCT( COMMA _trace ));
|
||||
const jint size = mem()->memory_size();
|
||||
const bool is_adjacent = aliasing.is_always_at_distance(size);
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (_trace.is_trace_adjacency()) {
|
||||
tty->print("Adjacent: %s, because size = %d and aliasing = ",
|
||||
is_adjacent ? "true" : "false", size);
|
||||
aliasing.print_on(tty);
|
||||
tty->cr();
|
||||
}
|
||||
#endif
|
||||
|
||||
return is_adjacent;
|
||||
}
|
618
src/hotspot/share/opto/mempointer.hpp
Normal file
618
src/hotspot/share/opto/mempointer.hpp
Normal file
@ -0,0 +1,618 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_OPTO_MEMPOINTER_HPP
|
||||
#define SHARE_OPTO_MEMPOINTER_HPP
|
||||
|
||||
#include "opto/memnode.hpp"
|
||||
#include "opto/noOverflowInt.hpp"
|
||||
|
||||
// The MemPointer is a shared facility to parse pointers and check the aliasing of pointers,
|
||||
// e.g. checking if two stores are adjacent.
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------
|
||||
//
|
||||
// Intuition and Examples:
|
||||
// We parse / decompose pointers into a linear form:
|
||||
//
|
||||
// pointer = SUM(scale_i * variable_i) + con
|
||||
//
|
||||
// where SUM() adds all "scale_i * variable_i" for each i together.
|
||||
//
|
||||
// The con and scale_i are compile-time constants (NoOverflowInt), and the variable_i are
|
||||
// compile-time variables (C2 nodes).
|
||||
//
|
||||
// For the MemPointer, we do not explicitly track the base address. For Java heap pointers, the
|
||||
// base address is just a variable in a summand with scale == 1. For native memory (C heap)
|
||||
// pointers, the base address is null, and is hence implicitly a zero constant.
|
||||
//
|
||||
//
|
||||
// Example 1: byte array access:
|
||||
//
|
||||
// array[i]
|
||||
//
|
||||
// pointer = array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i
|
||||
// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 1 * i
|
||||
// -------------------- ---------------------- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1
|
||||
//
|
||||
//
|
||||
// Example 2: int array access
|
||||
//
|
||||
// array[5 + i + 3 * j]
|
||||
//
|
||||
// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * 3 * j
|
||||
// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 12 * j
|
||||
// -------------------- ----------------------------- -------------------- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2
|
||||
//
|
||||
//
|
||||
// Example 3: Unsafe with int array
|
||||
//
|
||||
// UNSAFE.getInt(array, ARRAY_INT_BASE_OFFSET + 4 * i);
|
||||
//
|
||||
// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * i
|
||||
// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 4 * i
|
||||
// -------------------- --------------------- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1
|
||||
//
|
||||
//
|
||||
// Example 4: Unsafe with native memory address
|
||||
//
|
||||
// long address;
|
||||
// UNSAFE.getInt(null, address + 4 * i);
|
||||
//
|
||||
// pointer = address + 4 * i
|
||||
// = 1 * address + 0 + 4 * i
|
||||
// -------------------- --- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1
|
||||
//
|
||||
//
|
||||
// Example 5: MemorySegment with byte array as backing type
|
||||
//
|
||||
// byte[] array = new byte[1000];
|
||||
// MemorySegment ms = MemorySegment.ofArray(array);
|
||||
// assert ms.heapBase().get() == array: "array is base";
|
||||
// assert ms.address() == 0: "zero offset from base";
|
||||
// byte val = ms.get(ValueLayout.JAVA_BYTE, i);
|
||||
//
|
||||
// pointer = ms.heapBase() + ARRAY_BYTE_BASE_OFFSET + ms.address() + i
|
||||
// = 1 * array_base + ARRAY_BYTE_BASE_OFFSET + 0 + 1 * i
|
||||
// ----------------------- ------------------------------------- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1
|
||||
//
|
||||
//
|
||||
// Example 6: MemorySegment with native memory
|
||||
//
|
||||
// MemorySegment ms = Arena.ofAuto().allocate(1000, 1);
|
||||
// assert ms.heapBase().isEmpty(): "null base";
|
||||
// assert ms.address() != 0: "non-zero native memory address";
|
||||
// short val = ms.get(ValueLayout.JAVA_SHORT, 2L * i);
|
||||
//
|
||||
// pointer = ms.heapBase() + ms.address() + 2 i
|
||||
// = 0 + 1 * ms.address() + 2 * i
|
||||
// ------------ ---------------------- --------------------
|
||||
// = con scale_0 * variable_0 + scale_1 * variable_1
|
||||
//
|
||||
//
|
||||
// Example 7: Non-linear access to int array
|
||||
//
|
||||
// array[5 + i + j * k]
|
||||
//
|
||||
// pointer = array_base + ARRAY_INT_BASE_OFFSET + 4 * 5 + 4 * i + 4 * j * k
|
||||
// = 1 * array_base + ARRAY_INT_BASE_OFFSET + 20 + 4 * i + 4 * j * k
|
||||
// -------------------- ----------------------------- -------------------- --------------------
|
||||
// = scale_0 * variable_0 + con + scale_1 * variable_1 + scale_2 * variable_2
|
||||
//
|
||||
// Note: we simply stop parsing once a term is not linear. We keep "j * k" as its own variable.
|
||||
//
|
||||
//
|
||||
// Example 8: Unsafe with native memory address, non-linear access
|
||||
//
|
||||
// UNSAFE.getInt(null, i * j);
|
||||
//
|
||||
// pointer = i * j
|
||||
// = 0 + 1 * i * j
|
||||
// --- --------------------
|
||||
// = con + scale_0 * variable_0
|
||||
//
|
||||
// Note: we can always parse a pointer into its trivial linear form:
|
||||
//
|
||||
// pointer = 0 + 1 * pointer.
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------
|
||||
//
|
||||
// MemPointerDecomposedForm:
|
||||
// When the pointer is parsed, it is decomposed into a SUM of summands plus a constant:
|
||||
//
|
||||
// pointer = SUM(summands) + con
|
||||
//
|
||||
// Where each summand_i in summands has the form:
|
||||
//
|
||||
// summand_i = scale_i * variable_i
|
||||
//
|
||||
// Hence, the full decomposed form is:
|
||||
//
|
||||
// pointer = SUM(scale_i * variable_i) + con
|
||||
//
|
||||
// Note: the scale_i are compile-time constants (NoOverflowInt), and the variable_i are
|
||||
// compile-time variables (C2 nodes).
|
||||
// On 64-bit systems, this decomposed form is computed with long-add/mul, on 32-bit systems
|
||||
// it is computed with int-add/mul.
|
||||
//
|
||||
// MemPointerAliasing:
|
||||
// The decomposed form allows us to determine the aliasing between two pointers easily. For
|
||||
// example, if two pointers are identical, except for their constant:
|
||||
//
|
||||
// pointer1 = SUM(summands) + con1
|
||||
// pointer2 = SUM(summands) + con2
|
||||
//
|
||||
// then we can easily compute the distance between the pointers (distance = con2 - con1),
|
||||
// and determine if they are adjacent.
|
||||
//
|
||||
// MemPointerDecomposedFormParser:
|
||||
// Any pointer can be parsed into this (default / trivial) decomposed form:
|
||||
//
|
||||
// pointer = 1 * pointer + 0
|
||||
// scale_0 * variable_0 + con
|
||||
//
|
||||
// However, this is not particularly useful to compute aliasing. We would like to decompose
|
||||
// the pointer as far as possible, i.e. extract as many summands and add up the constants to
|
||||
// a single constant.
|
||||
//
|
||||
// Example (normal int-array access):
|
||||
// pointer1 = array[i + 0] = array_base + array_int_base_offset + 4L * ConvI2L(i + 0)
|
||||
// pointer2 = array[i + 1] = array_base + array_int_base_offset + 4L * ConvI2L(i + 1)
|
||||
//
|
||||
// At first, computing the aliasing is not immediately straight-forward in the general case because
|
||||
// the distance is hidden inside the ConvI2L. We can convert this (with array_int_base_offset = 16)
|
||||
// into these decomposed forms:
|
||||
//
|
||||
// pointer1 = 1L * array_base + 4L * i + 16L
|
||||
// pointer2 = 1L * array_base + 4L * i + 20L
|
||||
//
|
||||
// This allows us to easily see that these two pointers are adjacent (distance = 4).
|
||||
//
|
||||
// Hence, in MemPointerDecomposedFormParser::parse_decomposed_form, we start with the pointer as
|
||||
// a trivial summand. A summand can either be decomposed further or it is terminal (cannot
|
||||
// be decomposed further). We decompose the summands recursively until all remaining summands
|
||||
// are terminal, see MemPointerDecomposedFormParser::parse_sub_expression. This effectively parses
|
||||
// the pointer expression recursively.
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------
|
||||
//
|
||||
// We have to be careful on 64-bit systems with ConvI2L: decomposing its input is not
|
||||
// correct in general, overflows may not be preserved in the decomposed form:
|
||||
//
|
||||
// AddI: ConvI2L(a + b) != ConvI2L(a) + ConvI2L(b)
|
||||
// SubI: ConvI2L(a - b) != ConvI2L(a) - ConvI2L(b)
|
||||
// MulI: ConvI2L(a * conI) != ConvI2L(a) * ConvI2L(conI)
|
||||
// LShiftI: ConvI2L(a << conI) != ConvI2L(a) << ConvI2L(conI)
|
||||
//
|
||||
// If we want to prove the correctness of MemPointerAliasing, we need some guarantees,
|
||||
// that the MemPointers adequately represent the underlying pointers, such that we can
|
||||
// compute the aliasing based on the summands and constants.
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------
|
||||
//
|
||||
// Below, we will formulate a "MemPointer Lemma" that helps us to prove the correctness of
|
||||
// the MemPointerAliasing computations. To prove the "MemPointer Lemma", we need to define
|
||||
// the idea of a "safe decomposition", and then prove that all the decompositions we apply
|
||||
// are such "safe decompositions".
|
||||
//
|
||||
//
|
||||
// Definition: Safe decomposition
|
||||
// Trivial decomposition:
|
||||
// (SAFE0) The trivial decomposition from p to mp_0 = 0 + 1 * p is always safe.
|
||||
//
|
||||
// Non-trivial decomposition:
|
||||
// We decompose summand in:
|
||||
// mp_i = con + summand + SUM(other_summands)
|
||||
// resulting in: +-------------------------+
|
||||
// mp_{i+1} = con + dec_con + SUM(dec_summands) + SUM(other_summands)
|
||||
// = new_con + SUM(new_summands)
|
||||
// where mp_i means that the original pointer p was decomposed i times.
|
||||
//
|
||||
// We call a non-trivial decomposition safe if either:
|
||||
// (SAFE1) No matter the values of the summand variables:
|
||||
// mp_i = mp_{i+1}
|
||||
//
|
||||
// (SAFE2) The pointer is on an array with a known array_element_size_in_bytes,
|
||||
// and there is an integer x, such that:
|
||||
// mp_i = mp_{i+1} + x * array_element_size_in_bytes * 2^32
|
||||
//
|
||||
// Note: if "x = 0", we have "mp1 = mp2", and if "x != 0", then mp1 and mp2
|
||||
// have a distance at least twice as large as the array size, and so
|
||||
// at least one of mp1 or mp2 must be out of bounds of the array.
|
||||
//
|
||||
// MemPointer Lemma:
|
||||
// Given two pointers p1 and p2, and their respective MemPointers mp1 and mp2.
|
||||
// If these conditions hold:
|
||||
// (S0) mp1 and mp2 are constructed only with safe decompositions (SAFE0, SAFE1, SAFE2)
|
||||
// from p1 and p2, respectively.
|
||||
// (S1) Both p1 and p2 are within the bounds of the same memory object.
|
||||
// (S2) The constants do not differ too much: abs(mp1.con - mp2.con) < 2^31.
|
||||
// (S3) All summands of mp1 and mp2 are identical (i.e. only the constants are possibly different).
|
||||
//
|
||||
// then the pointer difference between p1 and p2 is identical to the difference between
|
||||
// mp1 and mp2:
|
||||
// p1 - p2 = mp1 - mp2
|
||||
//
|
||||
// Note: MemPointerDecomposedForm::get_aliasing_with relies on this MemPointer Lemma to
|
||||
// prove the correctness of its aliasing computation between two MemPointers.
|
||||
//
|
||||
//
|
||||
// Note: MemPointerDecomposedFormParser::is_safe_to_decompose_op checks that all
|
||||
// decompositions we apply are safe.
|
||||
//
|
||||
//
|
||||
// Proof of the "MemPointer Lemma":
|
||||
// Assume (S0-S3) and show that
|
||||
// p1 - p2 = mp1 - mp2
|
||||
//
|
||||
// We make a case distinction over the types of decompositions used in the construction of mp1 and mp2.
|
||||
//
|
||||
// Trivial Case: Only trivial (SAFE0) decompositions were used:
|
||||
// mp1 = 0 + 1 * p1 = p1
|
||||
// mp2 = 0 + 1 * p2 = p2
|
||||
// =>
|
||||
// p1 - p2 = mp1 - mp2
|
||||
//
|
||||
// Unsafe Case: We apply at least one unsafe decomposition:
|
||||
// This is a contradiction to (S0) and we are done.
|
||||
//
|
||||
// Case 1: Only decomposition of type (SAFE0) and (SAFE1) are used:
|
||||
// We make an induction proof over the decompositions from p1 to mp1, starting with
|
||||
// the trivial decomposition (SAFE0):
|
||||
// mp1_0 = 0 + 1 * p1 = p1
|
||||
// Then for the i-th non-trivial decomposition (SAFE1) we know that
|
||||
// mp1_i = mp1_{i+1}
|
||||
// and hence, after the n-th non-trivial decomposition from p1:
|
||||
// p1 = mp1_0 = mp1_i = mp1_n = mp1
|
||||
// Analogously, we can prove:
|
||||
// p2 = mp2
|
||||
//
|
||||
// p1 = mp1
|
||||
// p2 = mp2
|
||||
// =>
|
||||
// p1 - p2 = mp1 - mp2
|
||||
//
|
||||
// Case 2: At least one decomposition of type (SAFE2) and no unsafe decomposition is used.
|
||||
// Given we have (SAFE2) decompositions, we know that we are operating on an array of
|
||||
// known array_element_size_in_bytes. We can weaken the guarantees from (SAFE1)
|
||||
// decompositions to the same guarantee as (SAFE2) decompositions. Hence all applied
|
||||
// non-trivial decompositions satisfy:
|
||||
// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32
|
||||
// where x1_i = 0 for (SAFE1) decompositions.
|
||||
//
|
||||
// We make an induction proof over the decompositions from p1 to mp1, starting with
|
||||
// the trivial decomposition (SAFE0):
|
||||
// mp1_0 = 0 + 1 * p1 = p1
|
||||
// Then for the i-th non-trivial decomposition (SAFE1) or (SAFE2), we know that
|
||||
// mp1_i = mp1_{i+1} + x1_i * array_element_size_in_bytes * 2^32
|
||||
// and hence, if mp1 was decomposed with n non-trivial decompositions (SAFE1) or (SAFE2) from p1:
|
||||
// p1 = mp1 + x1 * array_element_size_in_bytes * 2^32
|
||||
// where
|
||||
// x1 = SUM(x1_i)
|
||||
// Analogously, we can prove:
|
||||
// p2 = mp2 + x2 * array_element_size_in_bytes * 2^32
|
||||
//
|
||||
// And hence, with x = x1 - x2 we have:
|
||||
// p1 - p2 = mp1 - mp2 + x * array_element_size_in_bytes * 2^32
|
||||
//
|
||||
// If "x = 0", then it follows:
|
||||
// p1 - p2 = mp1 - mp2
|
||||
//
|
||||
// If "x != 0", then:
|
||||
// abs(p1 - p2) = abs(mp1 - mp2 + x * array_element_size_in_bytes * 2^32)
|
||||
// >= abs(x * array_element_size_in_bytes * 2^32) - abs(mp1 - mp2)
|
||||
// -- apply x != 0 --
|
||||
// >= array_element_size_in_bytes * 2^32 - abs(mp1 - mp2)
|
||||
// -- apply (S3) --
|
||||
// = array_element_size_in_bytes * 2^32 - abs(mp1.con - mp2.con)
|
||||
// -- apply (S2) --
|
||||
// > array_element_size_in_bytes * 2^32 - 2^31
|
||||
// -- apply array_element_size_in_bytes > 0 --
|
||||
// >= array_element_size_in_bytes * 2^31
|
||||
// >= max_possible_array_size_in_bytes
|
||||
// >= array_size_in_bytes
|
||||
//
|
||||
// This shows that p1 and p2 have a distance greater than the array size, and hence at least one of the two
|
||||
// pointers must be out of bounds. This contradicts our assumption (S1) and we are done.
|
||||
|
||||
|
||||
#ifndef PRODUCT
|
||||
class TraceMemPointer : public StackObj {
|
||||
private:
|
||||
const bool _is_trace_pointer;
|
||||
const bool _is_trace_aliasing;
|
||||
const bool _is_trace_adjacency;
|
||||
|
||||
public:
|
||||
TraceMemPointer(const bool is_trace_pointer,
|
||||
const bool is_trace_aliasing,
|
||||
const bool is_trace_adjacency) :
|
||||
_is_trace_pointer( is_trace_pointer),
|
||||
_is_trace_aliasing( is_trace_aliasing),
|
||||
_is_trace_adjacency(is_trace_adjacency)
|
||||
{}
|
||||
|
||||
bool is_trace_pointer() const { return _is_trace_pointer; }
|
||||
bool is_trace_aliasing() const { return _is_trace_aliasing; }
|
||||
bool is_trace_adjacency() const { return _is_trace_adjacency; }
|
||||
};
|
||||
#endif
|
||||
|
||||
// Class to represent aliasing between two MemPointer.
|
||||
class MemPointerAliasing {
|
||||
public:
|
||||
enum Aliasing {
|
||||
Unknown, // Distance unknown.
|
||||
// Example: two "int[]" with different variable index offsets.
|
||||
// e.g. "array[i] vs array[j]".
|
||||
// e.g. "array1[i] vs array2[j]".
|
||||
Always}; // Constant distance = p1 - p2.
|
||||
// Example: The same address expression, except for a constant offset
|
||||
// e.g. "array[i] vs array[i+1]".
|
||||
private:
|
||||
const Aliasing _aliasing;
|
||||
const jint _distance;
|
||||
|
||||
MemPointerAliasing(const Aliasing aliasing, const jint distance) :
|
||||
_aliasing(aliasing),
|
||||
_distance(distance)
|
||||
{
|
||||
assert(_distance != min_jint, "given by condition (S3) of MemPointer Lemma");
|
||||
}
|
||||
|
||||
public:
|
||||
static MemPointerAliasing make_unknown() {
|
||||
return MemPointerAliasing(Unknown, 0);
|
||||
}
|
||||
|
||||
static MemPointerAliasing make_always(const jint distance) {
|
||||
return MemPointerAliasing(Always, distance);
|
||||
}
|
||||
|
||||
// Use case: exact aliasing and adjacency.
|
||||
bool is_always_at_distance(const jint distance) const {
|
||||
return _aliasing == Always && _distance == distance;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void print_on(outputStream* st) const {
|
||||
switch(_aliasing) {
|
||||
case Unknown: st->print("Unknown"); break;
|
||||
case Always: st->print("Always(%d)", _distance); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Summand of a MemPointerDecomposedForm:
|
||||
//
|
||||
// summand = scale * variable
|
||||
//
|
||||
// where variable is a C2 node.
|
||||
class MemPointerSummand : public StackObj {
|
||||
private:
|
||||
Node* _variable;
|
||||
NoOverflowInt _scale;
|
||||
|
||||
public:
|
||||
MemPointerSummand() :
|
||||
_variable(nullptr),
|
||||
_scale(NoOverflowInt::make_NaN()) {}
|
||||
MemPointerSummand(Node* variable, const NoOverflowInt& scale) :
|
||||
_variable(variable),
|
||||
_scale(scale)
|
||||
{
|
||||
assert(_variable != nullptr, "must have variable");
|
||||
assert(!_scale.is_zero(), "non-zero scale");
|
||||
}
|
||||
|
||||
Node* variable() const { return _variable; }
|
||||
NoOverflowInt scale() const { return _scale; }
|
||||
|
||||
static int cmp_by_variable_idx(MemPointerSummand* p1, MemPointerSummand* p2) {
|
||||
if (p1->variable() == nullptr) {
|
||||
return (p2->variable() == nullptr) ? 0 : 1;
|
||||
} else if (p2->variable() == nullptr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return p1->variable()->_idx - p2->variable()->_idx;
|
||||
}
|
||||
|
||||
friend bool operator==(const MemPointerSummand a, const MemPointerSummand b) {
|
||||
// Both "null" -> equal.
|
||||
if (a.variable() == nullptr && b.variable() == nullptr) { return true; }
|
||||
|
||||
// Same variable and scale?
|
||||
if (a.variable() != b.variable()) { return false; }
|
||||
return a.scale() == b.scale();
|
||||
}
|
||||
|
||||
friend bool operator!=(const MemPointerSummand a, const MemPointerSummand b) {
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void print_on(outputStream* st) const {
|
||||
st->print("Summand[");
|
||||
_scale.print_on(st);
|
||||
tty->print(" * [%d %s]]", _variable->_idx, _variable->Name());
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
// Decomposed form of the pointer sub-expression of "pointer".
|
||||
//
|
||||
// pointer = SUM(summands) + con
|
||||
//
|
||||
class MemPointerDecomposedForm : public StackObj {
|
||||
private:
|
||||
// We limit the number of summands to 10. This is just a best guess, and not at this
|
||||
// point supported by evidence. But I think it is reasonable: usually, a pointer
|
||||
// contains a base pointer (e.g. array pointer or null for native memory) and a few
|
||||
// variables. It should be rare that we have more than 9 variables.
|
||||
static const int SUMMANDS_SIZE = 10;
|
||||
|
||||
Node* _pointer; // pointer node associated with this (sub)pointer
|
||||
|
||||
MemPointerSummand _summands[SUMMANDS_SIZE];
|
||||
NoOverflowInt _con;
|
||||
|
||||
public:
|
||||
// Empty
|
||||
MemPointerDecomposedForm() : _pointer(nullptr), _con(NoOverflowInt::make_NaN()) {}
|
||||
|
||||
private:
|
||||
// Default / trivial: pointer = 0 + 1 * pointer
|
||||
MemPointerDecomposedForm(Node* pointer) : _pointer(pointer), _con(NoOverflowInt(0)) {
|
||||
assert(pointer != nullptr, "pointer must be non-null");
|
||||
_summands[0] = MemPointerSummand(pointer, NoOverflowInt(1));
|
||||
}
|
||||
|
||||
MemPointerDecomposedForm(Node* pointer, const GrowableArray<MemPointerSummand>& summands, const NoOverflowInt& con)
|
||||
: _pointer(pointer), _con(con) {
|
||||
assert(!_con.is_NaN(), "non-NaN constant");
|
||||
assert(summands.length() <= SUMMANDS_SIZE, "summands must fit");
|
||||
for (int i = 0; i < summands.length(); i++) {
|
||||
MemPointerSummand s = summands.at(i);
|
||||
assert(s.variable() != nullptr, "variable cannot be null");
|
||||
assert(!s.scale().is_NaN(), "non-NaN scale");
|
||||
_summands[i] = s;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static MemPointerDecomposedForm make_trivial(Node* pointer) {
|
||||
return MemPointerDecomposedForm(pointer);
|
||||
}
|
||||
|
||||
static MemPointerDecomposedForm make(Node* pointer, const GrowableArray<MemPointerSummand>& summands, const NoOverflowInt& con) {
|
||||
if (summands.length() <= SUMMANDS_SIZE) {
|
||||
return MemPointerDecomposedForm(pointer, summands, con);
|
||||
} else {
|
||||
return MemPointerDecomposedForm::make_trivial(pointer);
|
||||
}
|
||||
}
|
||||
|
||||
MemPointerAliasing get_aliasing_with(const MemPointerDecomposedForm& other
|
||||
NOT_PRODUCT( COMMA const TraceMemPointer& trace) ) const;
|
||||
|
||||
const MemPointerSummand summands_at(const uint i) const {
|
||||
assert(i < SUMMANDS_SIZE, "in bounds");
|
||||
return _summands[i];
|
||||
}
|
||||
|
||||
const NoOverflowInt con() const { return _con; }
|
||||
|
||||
#ifndef PRODUCT
|
||||
void print_on(outputStream* st) const {
|
||||
if (_pointer == nullptr) {
|
||||
st->print_cr("MemPointerDecomposedForm empty.");
|
||||
return;
|
||||
}
|
||||
st->print("MemPointerDecomposedForm[%d %s: con = ", _pointer->_idx, _pointer->Name());
|
||||
_con.print_on(st);
|
||||
for (int i = 0; i < SUMMANDS_SIZE; i++) {
|
||||
const MemPointerSummand& summand = _summands[i];
|
||||
if (summand.variable() != nullptr) {
|
||||
st->print(", ");
|
||||
summand.print_on(st);
|
||||
}
|
||||
}
|
||||
st->print_cr("]");
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
class MemPointerDecomposedFormParser : public StackObj {
|
||||
private:
|
||||
const MemNode* _mem;
|
||||
|
||||
// Internal data-structures for parsing.
|
||||
NoOverflowInt _con;
|
||||
GrowableArray<MemPointerSummand> _worklist;
|
||||
GrowableArray<MemPointerSummand> _summands;
|
||||
|
||||
// Resulting decomposed-form.
|
||||
MemPointerDecomposedForm _decomposed_form;
|
||||
|
||||
public:
|
||||
MemPointerDecomposedFormParser(const MemNode* mem) : _mem(mem), _con(NoOverflowInt(0)) {
|
||||
_decomposed_form = parse_decomposed_form();
|
||||
}
|
||||
|
||||
const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; }
|
||||
|
||||
private:
|
||||
MemPointerDecomposedForm parse_decomposed_form();
|
||||
void parse_sub_expression(const MemPointerSummand& summand);
|
||||
|
||||
bool is_safe_to_decompose_op(const int opc, const NoOverflowInt& scale) const;
|
||||
};
|
||||
|
||||
// Facility to parse the pointer of a Load or Store, so that aliasing between two such
|
||||
// memory operations can be determined (e.g. adjacency).
|
||||
class MemPointer : public StackObj {
|
||||
private:
|
||||
const MemNode* _mem;
|
||||
const MemPointerDecomposedForm _decomposed_form;
|
||||
|
||||
NOT_PRODUCT( const TraceMemPointer& _trace; )
|
||||
|
||||
public:
|
||||
MemPointer(const MemNode* mem NOT_PRODUCT( COMMA const TraceMemPointer& trace)) :
|
||||
_mem(mem),
|
||||
_decomposed_form(init_decomposed_form(_mem))
|
||||
NOT_PRODUCT( COMMA _trace(trace) )
|
||||
{
|
||||
#ifndef PRODUCT
|
||||
if (_trace.is_trace_pointer()) {
|
||||
tty->print_cr("MemPointer::MemPointer:");
|
||||
tty->print("mem: "); mem->dump();
|
||||
_mem->in(MemNode::Address)->dump_bfs(5, 0, "d");
|
||||
_decomposed_form.print_on(tty);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const MemNode* mem() const { return _mem; }
|
||||
const MemPointerDecomposedForm decomposed_form() const { return _decomposed_form; }
|
||||
bool is_adjacent_to_and_before(const MemPointer& other) const;
|
||||
|
||||
private:
|
||||
static const MemPointerDecomposedForm init_decomposed_form(const MemNode* mem) {
|
||||
assert(mem->is_Store(), "only stores are supported");
|
||||
ResourceMark rm;
|
||||
MemPointerDecomposedFormParser parser(mem);
|
||||
return parser.decomposed_form();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_MEMPOINTER_HPP
|
114
src/hotspot/share/opto/noOverflowInt.hpp
Normal file
114
src/hotspot/share/opto/noOverflowInt.hpp
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_OPTO_NOOVERFLOWINT_HPP
|
||||
#define SHARE_OPTO_NOOVERFLOWINT_HPP
|
||||
|
||||
#include "utilities/ostream.hpp"
|
||||
|
||||
// Wrapper around jint, which detects overflow.
|
||||
// If any operation overflows, then it returns a NaN.
|
||||
class NoOverflowInt {
|
||||
private:
|
||||
bool _is_NaN; // overflow, uninitialized, etc.
|
||||
jint _value;
|
||||
|
||||
public:
|
||||
// Default: NaN.
|
||||
constexpr NoOverflowInt() : _is_NaN(true), _value(0) {}
|
||||
|
||||
// Create from jlong (or jint) -> NaN if overflows jint.
|
||||
constexpr explicit NoOverflowInt(jlong value) : _is_NaN(true), _value(0) {
|
||||
jint trunc = (jint)value;
|
||||
if ((jlong)trunc == value) {
|
||||
_is_NaN = false;
|
||||
_value = trunc;
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr NoOverflowInt make_NaN() { return NoOverflowInt(); }
|
||||
|
||||
bool is_NaN() const { return _is_NaN; }
|
||||
jint value() const { assert(!is_NaN(), "NaN not allowed"); return _value; }
|
||||
bool is_zero() const { return !is_NaN() && value() == 0; }
|
||||
|
||||
friend NoOverflowInt operator+(const NoOverflowInt& a, const NoOverflowInt& b) {
|
||||
if (a.is_NaN()) { return a; }
|
||||
if (b.is_NaN()) { return b; }
|
||||
return NoOverflowInt((jlong)a.value() + (jlong)b.value());
|
||||
}
|
||||
|
||||
friend NoOverflowInt operator-(const NoOverflowInt& a, const NoOverflowInt& b) {
|
||||
if (a.is_NaN()) { return a; }
|
||||
if (b.is_NaN()) { return b; }
|
||||
return NoOverflowInt((jlong)a.value() - (jlong)b.value());
|
||||
}
|
||||
|
||||
friend NoOverflowInt operator*(const NoOverflowInt& a, const NoOverflowInt& b) {
|
||||
if (a.is_NaN()) { return a; }
|
||||
if (b.is_NaN()) { return b; }
|
||||
return NoOverflowInt((jlong)a.value() * (jlong)b.value());
|
||||
}
|
||||
|
||||
friend NoOverflowInt operator<<(const NoOverflowInt& a, const NoOverflowInt& b) {
|
||||
if (a.is_NaN()) { return a; }
|
||||
if (b.is_NaN()) { return b; }
|
||||
jint shift = b.value();
|
||||
if (shift < 0 || shift > 31) { return make_NaN(); }
|
||||
return NoOverflowInt((jlong)a.value() << shift);
|
||||
}
|
||||
|
||||
friend bool operator==(const NoOverflowInt& a, const NoOverflowInt& b) {
|
||||
if (a.is_NaN()) { return false; }
|
||||
if (b.is_NaN()) { return false; }
|
||||
return a.value() == b.value();
|
||||
}
|
||||
|
||||
NoOverflowInt abs() const {
|
||||
if (is_NaN()) { return *this; }
|
||||
if (value() >= 0) { return *this; }
|
||||
return NoOverflowInt(0) - *this;
|
||||
}
|
||||
|
||||
bool is_multiple_of(const NoOverflowInt& other) const {
|
||||
NoOverflowInt a = this->abs();
|
||||
NoOverflowInt b = other.abs();
|
||||
if (a.is_NaN()) { return false; }
|
||||
if (b.is_NaN()) { return false; }
|
||||
if (b.is_zero()) { return false; }
|
||||
return a.value() % b.value() == 0;
|
||||
}
|
||||
|
||||
#ifndef PRODUCT
|
||||
void print_on(outputStream* st) const {
|
||||
if (is_NaN()) {
|
||||
st->print("NaN");
|
||||
} else {
|
||||
st->print("%d", value());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif // SHARE_OPTO_NOOVERFLOWINT_HPP
|
138
src/hotspot/share/opto/traceMergeStoresTag.hpp
Normal file
138
src/hotspot/share/opto/traceMergeStoresTag.hpp
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_OPTO_TRACEMERGESTORESTAG_HPP
|
||||
#define SHARE_OPTO_TRACEMERGESTORESTAG_HPP
|
||||
|
||||
#include "utilities/bitMap.inline.hpp"
|
||||
#include "utilities/stringUtils.hpp"
|
||||
|
||||
namespace TraceMergeStores {
|
||||
#define COMPILER_TAG(flags) \
|
||||
flags(BASIC, "Trace basic analysis steps") \
|
||||
flags(POINTER, "Trace pointer IR") \
|
||||
flags(ALIASING, "Trace MemPointerSimpleForm::get_aliasing_with") \
|
||||
flags(ADJACENCY, "Trace adjacency") \
|
||||
flags(SUCCESS, "Trace successful merges") \
|
||||
|
||||
#define table_entry(name, description) name,
|
||||
enum Tag {
|
||||
COMPILER_TAG(table_entry)
|
||||
TAG_NUM,
|
||||
TAG_NONE
|
||||
};
|
||||
#undef table_entry
|
||||
|
||||
static const char* tag_descriptions[] = {
|
||||
#define array_of_labels(name, description) description,
|
||||
COMPILER_TAG(array_of_labels)
|
||||
#undef array_of_labels
|
||||
};
|
||||
|
||||
static const char* tag_names[] = {
|
||||
#define array_of_labels(name, description) #name,
|
||||
COMPILER_TAG(array_of_labels)
|
||||
#undef array_of_labels
|
||||
};
|
||||
|
||||
static Tag find_tag(const char* str) {
|
||||
for (int i = 0; i < TAG_NUM; i++) {
|
||||
if (strcmp(tag_names[i], str) == 0) {
|
||||
return (Tag)i;
|
||||
}
|
||||
}
|
||||
return TAG_NONE;
|
||||
}
|
||||
|
||||
class TagValidator {
|
||||
private:
|
||||
CHeapBitMap _tags;
|
||||
bool _valid;
|
||||
char* _bad;
|
||||
bool _is_print_usage;
|
||||
|
||||
public:
|
||||
TagValidator(ccstrlist option, bool is_print_usage) :
|
||||
_tags(TAG_NUM, mtCompiler),
|
||||
_valid(true),
|
||||
_bad(nullptr),
|
||||
_is_print_usage(is_print_usage)
|
||||
{
|
||||
for (StringUtils::CommaSeparatedStringIterator iter(option); *iter != nullptr && _valid; ++iter) {
|
||||
char const* tag_name = *iter;
|
||||
if (strcmp("help", tag_name) == 0) {
|
||||
if (_is_print_usage) {
|
||||
print_help();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
bool set_bit = true;
|
||||
// Check for "TAG" or "-TAG"
|
||||
if (strncmp("-", tag_name, strlen("-")) == 0) {
|
||||
tag_name++;
|
||||
set_bit = false;
|
||||
}
|
||||
Tag tag = find_tag(tag_name);
|
||||
if (TAG_NONE == tag) {
|
||||
// cap len to a value we know is enough for all tags
|
||||
const size_t len = MIN2<size_t>(strlen(*iter), 63) + 1;
|
||||
_bad = NEW_C_HEAP_ARRAY(char, len, mtCompiler);
|
||||
// strncpy always writes len characters. If the source string is
|
||||
// shorter, the function fills the remaining bytes with nulls.
|
||||
strncpy(_bad, *iter, len);
|
||||
_valid = false;
|
||||
} else {
|
||||
assert(tag < TAG_NUM, "out of bounds");
|
||||
_tags.at_put(tag, set_bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~TagValidator() {
|
||||
if (_bad != nullptr) {
|
||||
FREE_C_HEAP_ARRAY(char, _bad);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_valid() const { return _valid; }
|
||||
const char* what() const { return _bad; }
|
||||
const CHeapBitMap& tags() const {
|
||||
assert(is_valid(), "only read tags when valid");
|
||||
return _tags;
|
||||
}
|
||||
|
||||
static void print_help() {
|
||||
tty->cr();
|
||||
tty->print_cr("Usage for CompileCommand TraceMergeStores:");
|
||||
tty->print_cr(" -XX:CompileCommand=TraceMergeStores,<package.class::method>,<tags>");
|
||||
tty->print_cr(" %-22s %s", "tags", "descriptions");
|
||||
for (int i = 0; i < TAG_NUM; i++) {
|
||||
tty->print_cr(" %-22s %s", tag_names[i], tag_descriptions[i]);
|
||||
}
|
||||
tty->cr();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // SHARE_OPTO_TRACEMERGESTORESTAG_HPP
|
175
test/hotspot/gtest/opto/test_no_overflow_int.cpp
Normal file
175
test/hotspot/gtest/opto/test_no_overflow_int.cpp
Normal file
@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "opto/noOverflowInt.hpp"
|
||||
#include "unittest.hpp"
|
||||
|
||||
static void check_jlong(const jlong val) {
|
||||
const NoOverflowInt x(val);
|
||||
|
||||
if (val > max_jint || min_jint > val) {
|
||||
ASSERT_TRUE(x.is_NaN());
|
||||
} else {
|
||||
ASSERT_FALSE(x.is_NaN());
|
||||
ASSERT_EQ(x.value(), val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_VM(opto, NoOverflowInt_check_jlong) {
|
||||
jlong start = (jlong)min_jint - 10000LL;
|
||||
jlong end = (jlong)max_jint + 10000LL;
|
||||
for (jlong i = start; i < end; i+= 1000LL) {
|
||||
check_jlong(i);
|
||||
}
|
||||
|
||||
check_jlong((jlong)min_jint - 1LL);
|
||||
check_jlong((jlong)min_jint);
|
||||
check_jlong((jlong)min_jint + 1LL);
|
||||
check_jlong((jlong)max_jint - 1LL);
|
||||
check_jlong((jlong)max_jint);
|
||||
check_jlong((jlong)max_jint + 1LL);
|
||||
|
||||
const NoOverflowInt nan;
|
||||
ASSERT_TRUE(nan.is_NaN());
|
||||
}
|
||||
|
||||
TEST_VM(opto, NoOverflowInt_add_sub) {
|
||||
const NoOverflowInt nan;
|
||||
const NoOverflowInt zero(0);
|
||||
const NoOverflowInt one(1);
|
||||
const NoOverflowInt two(2);
|
||||
const NoOverflowInt big(1 << 30);
|
||||
|
||||
ASSERT_EQ((one + two).value(), 3);
|
||||
ASSERT_EQ((one - two).value(), -1);
|
||||
ASSERT_TRUE((nan + one).is_NaN());
|
||||
ASSERT_TRUE((one + nan).is_NaN());
|
||||
ASSERT_TRUE((nan + nan).is_NaN());
|
||||
ASSERT_TRUE((nan - one).is_NaN());
|
||||
ASSERT_TRUE((one - nan).is_NaN());
|
||||
ASSERT_TRUE((nan - nan).is_NaN());
|
||||
|
||||
ASSERT_EQ((big + one).value(), (1 << 30) + 1);
|
||||
ASSERT_TRUE((big + big).is_NaN());
|
||||
ASSERT_EQ((big - one).value(), (1 << 30) - 1);
|
||||
ASSERT_EQ((big - big).value(), 0);
|
||||
|
||||
ASSERT_EQ((big - one + big).value(), max_jint);
|
||||
ASSERT_EQ((zero - big - big).value(), min_jint);
|
||||
ASSERT_TRUE((zero - big - big - one).is_NaN());
|
||||
}
|
||||
|
||||
TEST_VM(opto, NoOverflowInt_mul) {
|
||||
const NoOverflowInt nan;
|
||||
const NoOverflowInt zero(0);
|
||||
const NoOverflowInt one(1);
|
||||
const NoOverflowInt two(2);
|
||||
const NoOverflowInt big(1 << 30);
|
||||
|
||||
ASSERT_EQ((one * two).value(), 2);
|
||||
ASSERT_TRUE((nan * one).is_NaN());
|
||||
ASSERT_TRUE((one * nan).is_NaN());
|
||||
ASSERT_TRUE((nan * nan).is_NaN());
|
||||
|
||||
ASSERT_EQ((big * one).value(), (1 << 30));
|
||||
ASSERT_EQ((one * big).value(), (1 << 30));
|
||||
ASSERT_EQ((big * zero).value(), 0);
|
||||
ASSERT_EQ((zero * big).value(), 0);
|
||||
ASSERT_TRUE((big * big).is_NaN());
|
||||
ASSERT_TRUE((big * two).is_NaN());
|
||||
|
||||
ASSERT_EQ(((big - one) * two).value(), max_jint - 1);
|
||||
ASSERT_EQ(((one - big) * two).value(), min_jint + 2);
|
||||
ASSERT_EQ(((zero - big) * two).value(), min_jint);
|
||||
ASSERT_TRUE(((big + one) * two).is_NaN());
|
||||
ASSERT_TRUE(((zero - big - one) * two).is_NaN());
|
||||
}
|
||||
|
||||
TEST_VM(opto, NoOverflowInt_lshift) {
|
||||
const NoOverflowInt nan;
|
||||
const NoOverflowInt zero(0);
|
||||
const NoOverflowInt one(1);
|
||||
const NoOverflowInt two(2);
|
||||
const NoOverflowInt big(1 << 30);
|
||||
|
||||
for (int i = 0; i < 31; i++) {
|
||||
ASSERT_EQ((one << NoOverflowInt(i)).value(), 1LL << i);
|
||||
}
|
||||
for (int i = 31; i < 1000; i++) {
|
||||
ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN());
|
||||
}
|
||||
for (int i = -1000; i < 0; i++) {
|
||||
ASSERT_TRUE((one << NoOverflowInt(i)).is_NaN());
|
||||
}
|
||||
|
||||
ASSERT_EQ((NoOverflowInt(3) << NoOverflowInt(2)).value(), 3 * 4);
|
||||
ASSERT_EQ((NoOverflowInt(11) << NoOverflowInt(5)).value(), 11 * 32);
|
||||
ASSERT_EQ((NoOverflowInt(-13) << NoOverflowInt(4)).value(), -13 * 16);
|
||||
}
|
||||
|
||||
TEST_VM(opto, NoOverflowInt_misc) {
|
||||
const NoOverflowInt nan;
|
||||
const NoOverflowInt zero(0);
|
||||
const NoOverflowInt one(1);
|
||||
const NoOverflowInt two(2);
|
||||
const NoOverflowInt big(1 << 30);
|
||||
|
||||
// operator==
|
||||
ASSERT_FALSE(nan == nan);
|
||||
ASSERT_FALSE(nan == zero);
|
||||
ASSERT_FALSE(zero == nan);
|
||||
ASSERT_TRUE(zero == zero);
|
||||
ASSERT_TRUE(one == one);
|
||||
ASSERT_TRUE((one + two) == (two + one));
|
||||
ASSERT_TRUE((big + two) == (two + big));
|
||||
ASSERT_FALSE((big + big) == (big + big));
|
||||
ASSERT_TRUE((big - one + big) == (big - one + big));
|
||||
|
||||
// abs
|
||||
for (int i = 0; i < (1 << 31); i += 1024) {
|
||||
ASSERT_EQ(NoOverflowInt(i).abs().value(), i);
|
||||
ASSERT_EQ(NoOverflowInt(-i).abs().value(), i);
|
||||
}
|
||||
ASSERT_EQ(NoOverflowInt(max_jint).abs().value(), max_jint);
|
||||
ASSERT_EQ(NoOverflowInt(min_jint + 1).abs().value(), max_jint);
|
||||
ASSERT_TRUE(NoOverflowInt(min_jint).abs().is_NaN());
|
||||
ASSERT_TRUE(NoOverflowInt(nan).abs().is_NaN());
|
||||
|
||||
// is_multiple_of
|
||||
ASSERT_TRUE(one.is_multiple_of(one));
|
||||
ASSERT_FALSE(one.is_multiple_of(nan));
|
||||
ASSERT_FALSE(nan.is_multiple_of(one));
|
||||
ASSERT_FALSE(nan.is_multiple_of(nan));
|
||||
for (int i = 0; i < (1 << 31); i += 1023) {
|
||||
ASSERT_TRUE(NoOverflowInt(i).is_multiple_of(one));
|
||||
ASSERT_TRUE(NoOverflowInt(-i).is_multiple_of(one));
|
||||
ASSERT_FALSE(NoOverflowInt(i).is_multiple_of(zero));
|
||||
ASSERT_FALSE(NoOverflowInt(-i).is_multiple_of(zero));
|
||||
}
|
||||
ASSERT_TRUE(NoOverflowInt(33 * 7).is_multiple_of(NoOverflowInt(33)));
|
||||
ASSERT_TRUE(NoOverflowInt(13 * 5).is_multiple_of(NoOverflowInt(5)));
|
||||
ASSERT_FALSE(NoOverflowInt(7).is_multiple_of(NoOverflowInt(5)));
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ import java.util.Random;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8318446 8331054 8331311
|
||||
* @bug 8318446 8331054 8331311 8335392
|
||||
* @summary Test merging of consecutive stores
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib /
|
||||
@ -42,7 +42,7 @@ import java.util.Random;
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8318446 8331054 8331311
|
||||
* @bug 8318446 8331054 8331311 8335392
|
||||
* @summary Test merging of consecutive stores
|
||||
* @modules java.base/jdk.internal.misc
|
||||
* @library /test/lib /
|
||||
@ -75,6 +75,17 @@ public class TestMergeStores {
|
||||
long vL1;
|
||||
long vL2;
|
||||
|
||||
static int zero0 = 0;
|
||||
static int zero1 = 0;
|
||||
static int zero2 = 0;
|
||||
static int zero3 = 0;
|
||||
static int zero4 = 0;
|
||||
static int zero5 = 0;
|
||||
static int zero6 = 0;
|
||||
static int zero7 = 0;
|
||||
static int zero8 = 0;
|
||||
static int zero9 = 0;
|
||||
|
||||
interface TestFunction {
|
||||
Object[] run(boolean isWarmUp, int rnd);
|
||||
}
|
||||
@ -154,6 +165,15 @@ public class TestMergeStores {
|
||||
testGroups.get("test7BE").put("test7RBE", (_,_) -> { return test7RBE(aB.clone(), offset1, vI1); });
|
||||
testGroups.get("test7BE").put("test7aBE", (_,_) -> { return test7aBE(aB.clone(), offset1, vI1); });
|
||||
|
||||
testGroups.put("test10", new HashMap<String,TestFunction>());
|
||||
testGroups.get("test10").put("test10R", (_,_) -> { return test10R(aB.clone()); });
|
||||
testGroups.get("test10").put("test10a", (_,_) -> { return test10a(aB.clone()); });
|
||||
testGroups.get("test10").put("test10b", (_,_) -> { return test10b(aB.clone()); });
|
||||
testGroups.get("test10").put("test10c", (_,_) -> { return test10c(aB.clone()); });
|
||||
testGroups.get("test10").put("test10d", (_,_) -> { return test10d(aB.clone()); });
|
||||
testGroups.get("test10").put("test10e", (_,_) -> { return test10e(aB.clone()); });
|
||||
testGroups.get("test10").put("test10f", (_,_) -> { return test10f(aB.clone()); });
|
||||
|
||||
testGroups.put("test100", new HashMap<String,TestFunction>());
|
||||
testGroups.get("test100").put("test100R", (_,_) -> { return test100R(aS.clone(), offset1); });
|
||||
testGroups.get("test100").put("test100a", (_,_) -> { return test100a(aS.clone(), offset1); });
|
||||
@ -234,6 +254,10 @@ public class TestMergeStores {
|
||||
testGroups.get("test600").put("test600R", (_,i) -> { return test600R(aB.clone(), aI.clone(), i); });
|
||||
testGroups.get("test600").put("test600a", (_,i) -> { return test600a(aB.clone(), aI.clone(), i); });
|
||||
|
||||
testGroups.put("test601", new HashMap<String,TestFunction>());
|
||||
testGroups.get("test601").put("test601R", (_,i) -> { return test601R(aB.clone(), aI.clone(), i, offset1); });
|
||||
testGroups.get("test601").put("test601a", (_,i) -> { return test601a(aB.clone(), aI.clone(), i, offset1); });
|
||||
|
||||
testGroups.put("test700", new HashMap<String,TestFunction>());
|
||||
testGroups.get("test700").put("test700R", (_,i) -> { return test700R(aI.clone(), i); });
|
||||
testGroups.get("test700").put("test700a", (_,i) -> { return test700a(aI.clone(), i); });
|
||||
@ -274,6 +298,12 @@ public class TestMergeStores {
|
||||
"test5a",
|
||||
"test6a",
|
||||
"test7a",
|
||||
"test10a",
|
||||
"test10b",
|
||||
"test10c",
|
||||
"test10d",
|
||||
"test10e",
|
||||
"test10f",
|
||||
"test7aBE",
|
||||
"test100a",
|
||||
"test101a",
|
||||
@ -292,6 +322,7 @@ public class TestMergeStores {
|
||||
"test501aBE",
|
||||
"test502aBE",
|
||||
"test600a",
|
||||
"test601a",
|
||||
"test700a",
|
||||
"test800a",
|
||||
"test800aBE"})
|
||||
@ -611,9 +642,8 @@ public class TestMergeStores {
|
||||
}
|
||||
|
||||
@Test
|
||||
// Disabled by JDK-8335390, to be enabled again by JDK-8335392.
|
||||
// @IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"},
|
||||
// applyIf = {"UseUnalignedAccesses", "true"})
|
||||
@IR(counts = {IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"},
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test1f(byte[] a) {
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 0, (byte)0xbe);
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 1, (byte)0xba);
|
||||
@ -1124,6 +1154,145 @@ public class TestMergeStores {
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@DontCompile
|
||||
static Object[] test10R(byte[] a) {
|
||||
int zero = zero0 + zero1 + zero2 + zero3 + zero4
|
||||
+ zero5 + zero6 + zero7 + zero8 + zero9;
|
||||
a[zero + 0] = 'h';
|
||||
a[zero + 1] = 'e';
|
||||
a[zero + 2] = 'l';
|
||||
a[zero + 3] = 'l';
|
||||
a[zero + 4] = 'o';
|
||||
a[zero + 5] = ' ';
|
||||
a[zero + 6] = ':';
|
||||
a[zero + 7] = ')';
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"})
|
||||
static Object[] test10a(byte[] a) {
|
||||
// We have 11 summands: 10x zero variable + 1x array base.
|
||||
// Parsing only allows 10 summands -> does not merge the stores.
|
||||
int zero = zero0 + zero1 + zero2 + zero3 + zero4
|
||||
+ zero5 + zero6 + zero7 + zero8 + zero9;
|
||||
a[zero + 0] = 'h';
|
||||
a[zero + 1] = 'e';
|
||||
a[zero + 2] = 'l';
|
||||
a[zero + 3] = 'l';
|
||||
a[zero + 4] = 'o';
|
||||
a[zero + 5] = ' ';
|
||||
a[zero + 6] = ':';
|
||||
a[zero + 7] = ')';
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test10b(byte[] a) {
|
||||
int zero = zero0 + zero1 + zero2 + zero3 + zero4
|
||||
+ zero5 + zero6 + zero7 + zero8;
|
||||
// We have 10 summands: 9x zero variable + 1x array base.
|
||||
// Parsing allows 10 summands, so this should merge the stores.
|
||||
a[zero + 0] = 'h';
|
||||
a[zero + 1] = 'e';
|
||||
a[zero + 2] = 'l';
|
||||
a[zero + 3] = 'l';
|
||||
a[zero + 4] = 'o';
|
||||
a[zero + 5] = ' ';
|
||||
a[zero + 6] = ':';
|
||||
a[zero + 7] = ')';
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1", // 1 left in uncommon trap path of RangeCheck
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test10c(byte[] a) {
|
||||
int zero = 7 * zero0 + 7 * zero1 + 7 * zero2 + 7 * zero3 + 7 * zero4
|
||||
+ 7 * zero5 + 7 * zero6 + 7 * zero7 + 7 * zero8;
|
||||
// The "7 * zero" is split into "zero << 3 - zero". But the parsing combines it again, lowering the summand count.
|
||||
// We have 10 summands: 9x zero variable + 1x array base.
|
||||
// Parsing allows 10 summands, so this should merge the stores.
|
||||
a[zero + 0] = 'h';
|
||||
a[zero + 1] = 'e';
|
||||
a[zero + 2] = 'l';
|
||||
a[zero + 3] = 'l';
|
||||
a[zero + 4] = 'o';
|
||||
a[zero + 5] = ' ';
|
||||
a[zero + 6] = ':';
|
||||
a[zero + 7] = ')';
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test10d(byte[] a) {
|
||||
// Summand is subtracted from itself -> scale = 0 -> should be removed from list.
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 4) - zero0, (byte)'o');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 5) - zero0, (byte)' ');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 6) - zero0, (byte)':');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 7) - zero0, (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test10e(byte[] a) {
|
||||
// Summand is subtracted from itself -> scale = 0 -> should be removed from list. Thus equal to if not present at all.
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 0) - zero0, (byte)'h');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 1) - zero0, (byte)'e');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 2) - zero0, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + (long)(zero0 + 3) - zero0, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 4, (byte)'o');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 5, (byte)' ');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 6, (byte)':');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + 7, (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merge
|
||||
IRNode.STORE_C_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "byte\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"})
|
||||
static Object[] test10f(byte[] a) {
|
||||
int big = 1 << 29;
|
||||
// Adding up the scales overflows -> no merge.
|
||||
long offset = zero9 * big + zero9 * big + zero9 * big + zero9 * big;
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)'h');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)'e');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)'l');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)'o');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)' ');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)':');
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@DontCompile
|
||||
static Object[] test100R(short[] a, int offset) {
|
||||
a[offset + 0] = (short)0x0100;
|
||||
@ -1560,15 +1729,12 @@ public class TestMergeStores {
|
||||
}
|
||||
|
||||
@Test
|
||||
// We must be careful with mismatched accesses on arrays:
|
||||
// An int-array can have about 2x max_int size, and hence if we address bytes in it, we can have int-overflows.
|
||||
// We might consider addresses (x + 0) and (x + 1) as adjacent, even if x = max_int, and therefore the second
|
||||
// address overflows and is not adjacent at all.
|
||||
// Therefore, we should only consider stores that have the same size as the element type of the array.
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // no merging
|
||||
// All constants are known, and AddI can be converted to AddL safely, hence the stores can be merged.
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_C_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"})
|
||||
IRNode.STORE_L_OF_CLASS, "int\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test400a(int[] a) {
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 0, (byte)0xbe);
|
||||
UNSAFE.putByte(a, UNSAFE.ARRAY_INT_BASE_OFFSET + 1, (byte)0xba);
|
||||
@ -1858,7 +2024,11 @@ public class TestMergeStores {
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8"}) // note: bottom type
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test600a(byte[] aB, int[] aI, int i) {
|
||||
Object a = null;
|
||||
long base = 0;
|
||||
@ -1869,7 +2039,7 @@ public class TestMergeStores {
|
||||
a = aI;
|
||||
base = UNSAFE.ARRAY_INT_BASE_OFFSET;
|
||||
}
|
||||
// array a is an aryptr, but its element type is unknown, i.e. bottom.
|
||||
// Array type is unknown, i.e. bottom[]. But all AddI can be safely converted to AddL -> safe to merge.
|
||||
UNSAFE.putByte(a, base + 0, (byte)0xbe);
|
||||
UNSAFE.putByte(a, base + 1, (byte)0xba);
|
||||
UNSAFE.putByte(a, base + 2, (byte)0xad);
|
||||
@ -1881,6 +2051,63 @@ public class TestMergeStores {
|
||||
return new Object[]{ aB, aI };
|
||||
}
|
||||
|
||||
@DontCompile
|
||||
static Object[] test601R(byte[] aB, int[] aI, int i, int offset1) {
|
||||
Object a = null;
|
||||
long base = 0;
|
||||
if (i % 2 == 0) {
|
||||
a = aB;
|
||||
base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||
} else {
|
||||
a = aI;
|
||||
base = UNSAFE.ARRAY_INT_BASE_OFFSET;
|
||||
}
|
||||
UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe);
|
||||
UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba);
|
||||
UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad);
|
||||
UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba);
|
||||
UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef);
|
||||
UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe);
|
||||
UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad);
|
||||
UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde);
|
||||
return new Object[]{ aB, aI };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "8", // nothing merged
|
||||
IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0"},
|
||||
applyIfPlatform = {"64-bit", "true"})
|
||||
@IR(counts = {IRNode.STORE_B_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_C_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_I_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "0",
|
||||
IRNode.STORE_L_OF_CLASS, "bottom\\\\[int:>=0] \\\\(java/lang/Cloneable,java/io/Serializable\\\\)", "1"}, // all merged
|
||||
applyIf = {"UseUnalignedAccesses", "true"},
|
||||
applyIfPlatform = {"32-bit", "true"})
|
||||
static Object[] test601a(byte[] aB, int[] aI, int i, int offset1) {
|
||||
Object a = null;
|
||||
long base = 0;
|
||||
if (i % 2 == 0) {
|
||||
a = aB;
|
||||
base = UNSAFE.ARRAY_BYTE_BASE_OFFSET;
|
||||
} else {
|
||||
a = aI;
|
||||
base = UNSAFE.ARRAY_INT_BASE_OFFSET;
|
||||
}
|
||||
// Array type is unknown, i.e. bottom[]. Hence we do not know the element size of the array.
|
||||
// Thus, on 64-bits systems merging is not safe, there could be overflows.
|
||||
UNSAFE.putByte(a, base + (offset1 + 0), (byte)0xbe);
|
||||
UNSAFE.putByte(a, base + (offset1 + 1), (byte)0xba);
|
||||
UNSAFE.putByte(a, base + (offset1 + 2), (byte)0xad);
|
||||
UNSAFE.putByte(a, base + (offset1 + 3), (byte)0xba);
|
||||
UNSAFE.putByte(a, base + (offset1 + 4), (byte)0xef);
|
||||
UNSAFE.putByte(a, base + (offset1 + 5), (byte)0xbe);
|
||||
UNSAFE.putByte(a, base + (offset1 + 6), (byte)0xad);
|
||||
UNSAFE.putByte(a, base + (offset1 + 7), (byte)0xde);
|
||||
return new Object[]{ aB, aI };
|
||||
}
|
||||
|
||||
@DontCompile
|
||||
static Object[] test700R(int[] a, long v1) {
|
||||
a[0] = (int)(v1 >> -1);
|
||||
|
426
test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java
Normal file
426
test/hotspot/jtreg/compiler/c2/TestMergeStoresMemorySegment.java
Normal file
@ -0,0 +1,426 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package compiler.c2;
|
||||
|
||||
import compiler.lib.ir_framework.*;
|
||||
import jdk.test.lib.Utils;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Random;
|
||||
import java.lang.foreign.*;
|
||||
|
||||
/*
|
||||
* @test id=byte-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment ByteArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=char-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment CharArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=short-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment ShortArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=int-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment IntArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=long-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment LongArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=float-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment FloatArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=double-array
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment DoubleArray
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=byte-buffer
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment ByteBuffer
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=byte-buffer-direct
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment ByteBufferDirect
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test id=native
|
||||
* @bug 8335392
|
||||
* @summary Test MergeStores optimization for MemorySegment
|
||||
* @library /test/lib /
|
||||
* @run driver compiler.c2.TestMergeStoresMemorySegment Native
|
||||
*/
|
||||
|
||||
// FAILS: mixed providers currently do not merge stores. Maybe there is some inlining issue.
|
||||
// /*
|
||||
// * @test id=mixed-array
|
||||
// * @bug 8335392
|
||||
// * @summary Test MergeStores optimization for MemorySegment
|
||||
// * @library /test/lib /
|
||||
// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedArray
|
||||
// */
|
||||
//
|
||||
// /*
|
||||
// * @test id=MixedBuffer
|
||||
// * @bug 8335392
|
||||
// * @summary Test MergeStores optimization for MemorySegment
|
||||
// * @library /test/lib /
|
||||
// * @run driver compiler.c2.TestMergeStoresMemorySegment MixedBuffer
|
||||
// */
|
||||
//
|
||||
// /*
|
||||
// * @test id=mixed
|
||||
// * @bug 8335392
|
||||
// * @summary Test MergeStores optimization for MemorySegment
|
||||
// * @library /test/lib /
|
||||
// * @run driver compiler.c2.TestMergeStoresMemorySegment Mixed
|
||||
// */
|
||||
|
||||
public class TestMergeStoresMemorySegment {
|
||||
public static void main(String[] args) {
|
||||
for (String unaligned : new String[]{"-XX:-UseUnalignedAccesses", "-XX:+UseUnalignedAccesses"}) {
|
||||
TestFramework framework = new TestFramework(TestMergeStoresMemorySegmentImpl.class);
|
||||
framework.addFlags("-DmemorySegmentProviderNameForTestVM=" + args[0], unaligned);
|
||||
framework.start();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class TestMergeStoresMemorySegmentImpl {
|
||||
static final int BACKING_SIZE = 1024 * 8;
|
||||
static final Random RANDOM = Utils.getRandomInstance();
|
||||
|
||||
private static final String START = "(\\d+(\\s){2}(";
|
||||
private static final String MID = ".*)+(\\s){2}===.*";
|
||||
private static final String END = ")";
|
||||
|
||||
// Custom Regex: allows us to only match Store that come from MemorySegment internals.
|
||||
private static final String REGEX_STORE_B_TO_MS_FROM_B = START + "StoreB" + MID + END + "ScopedMemoryAccess::putByteInternal";
|
||||
private static final String REGEX_STORE_C_TO_MS_FROM_B = START + "StoreC" + MID + END + "ScopedMemoryAccess::putByteInternal";
|
||||
private static final String REGEX_STORE_I_TO_MS_FROM_B = START + "StoreI" + MID + END + "ScopedMemoryAccess::putByteInternal";
|
||||
private static final String REGEX_STORE_L_TO_MS_FROM_B = START + "StoreL" + MID + END + "ScopedMemoryAccess::putByteInternal";
|
||||
|
||||
interface TestFunction {
|
||||
Object[] run();
|
||||
}
|
||||
|
||||
interface MemorySegmentProvider {
|
||||
MemorySegment newMemorySegment();
|
||||
}
|
||||
|
||||
static MemorySegmentProvider provider;
|
||||
|
||||
static {
|
||||
String providerName = System.getProperty("memorySegmentProviderNameForTestVM");
|
||||
provider = switch (providerName) {
|
||||
case "ByteArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteArray;
|
||||
case "CharArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfCharArray;
|
||||
case "ShortArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfShortArray;
|
||||
case "IntArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfIntArray;
|
||||
case "LongArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfLongArray;
|
||||
case "FloatArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfFloatArray;
|
||||
case "DoubleArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfDoubleArray;
|
||||
case "ByteBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBuffer;
|
||||
case "ByteBufferDirect" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfByteBufferDirect;
|
||||
case "Native" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfNative;
|
||||
case "MixedArray" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedArray;
|
||||
case "MixedBuffer" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixedBuffer;
|
||||
case "Mixed" -> TestMergeStoresMemorySegmentImpl::newMemorySegmentOfMixed;
|
||||
default -> throw new RuntimeException("Test argument not recognized: " + providerName);
|
||||
};
|
||||
}
|
||||
|
||||
// List of tests
|
||||
Map<String, TestFunction> tests = new HashMap<>();
|
||||
|
||||
// List of golden values, the results from the first run before compilation
|
||||
Map<String, Object[]> golds = new HashMap<>();
|
||||
|
||||
public TestMergeStoresMemorySegmentImpl () {
|
||||
// Generate two MemorySegments as inputs
|
||||
MemorySegment a = newMemorySegment();
|
||||
MemorySegment b = newMemorySegment();
|
||||
fillRandom(a);
|
||||
fillRandom(b);
|
||||
|
||||
// Future Work: add more test cases. For now, the issue seems to be that
|
||||
// RangeCheck smearing does not remove the RangeChecks, thus
|
||||
// we can only ever merge two stores.
|
||||
//
|
||||
// Ideas for more test cases, once they are better optimized:
|
||||
//
|
||||
// Have about 3 variables, each either int or long. Add all in int or
|
||||
// long. Give them different scales. Compute the address in the same
|
||||
// expression or separately. Use different element store sizes (BCIL).
|
||||
//
|
||||
tests.put("test_xxx", () -> test_xxx(copy(a), 5, 11, 31));
|
||||
tests.put("test_yyy", () -> test_yyy(copy(a), 5, 11, 31));
|
||||
tests.put("test_zzz", () -> test_zzz(copy(a), 5, 11, 31));
|
||||
|
||||
// Compute gold value for all test methods before compilation
|
||||
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
TestFunction test = entry.getValue();
|
||||
Object[] gold = test.run();
|
||||
golds.put(name, gold);
|
||||
}
|
||||
}
|
||||
|
||||
MemorySegment newMemorySegment() {
|
||||
return provider.newMemorySegment();
|
||||
}
|
||||
|
||||
MemorySegment copy(MemorySegment src) {
|
||||
MemorySegment dst = newMemorySegment();
|
||||
MemorySegment.copy(src, 0, dst, 0, src.byteSize());
|
||||
return dst;
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfByteArray() {
|
||||
return MemorySegment.ofArray(new byte[BACKING_SIZE]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfCharArray() {
|
||||
return MemorySegment.ofArray(new char[BACKING_SIZE / 2]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfShortArray() {
|
||||
return MemorySegment.ofArray(new short[BACKING_SIZE / 2]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfIntArray() {
|
||||
return MemorySegment.ofArray(new int[BACKING_SIZE / 4]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfLongArray() {
|
||||
return MemorySegment.ofArray(new long[BACKING_SIZE / 8]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfFloatArray() {
|
||||
return MemorySegment.ofArray(new float[BACKING_SIZE / 4]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfDoubleArray() {
|
||||
return MemorySegment.ofArray(new double[BACKING_SIZE / 8]);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfByteBuffer() {
|
||||
return MemorySegment.ofBuffer(ByteBuffer.allocate(BACKING_SIZE));
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfByteBufferDirect() {
|
||||
return MemorySegment.ofBuffer(ByteBuffer.allocateDirect(BACKING_SIZE));
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfNative() {
|
||||
// Auto arena: GC decides when there is no reference to the MemorySegment,
|
||||
// and then it deallocates the backing memory.
|
||||
return Arena.ofAuto().allocate(BACKING_SIZE, 1);
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfMixedArray() {
|
||||
switch(RANDOM.nextInt(7)) {
|
||||
case 0 -> { return newMemorySegmentOfByteArray(); }
|
||||
case 1 -> { return newMemorySegmentOfCharArray(); }
|
||||
case 2 -> { return newMemorySegmentOfShortArray(); }
|
||||
case 3 -> { return newMemorySegmentOfIntArray(); }
|
||||
case 4 -> { return newMemorySegmentOfLongArray(); }
|
||||
case 5 -> { return newMemorySegmentOfFloatArray(); }
|
||||
default -> { return newMemorySegmentOfDoubleArray(); }
|
||||
}
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfMixedBuffer() {
|
||||
switch (RANDOM.nextInt(2)) {
|
||||
case 0 -> { return newMemorySegmentOfByteBuffer(); }
|
||||
default -> { return newMemorySegmentOfByteBufferDirect(); }
|
||||
}
|
||||
}
|
||||
|
||||
static MemorySegment newMemorySegmentOfMixed() {
|
||||
switch (RANDOM.nextInt(3)) {
|
||||
case 0 -> { return newMemorySegmentOfMixedArray(); }
|
||||
case 1 -> { return newMemorySegmentOfMixedBuffer(); }
|
||||
default -> { return newMemorySegmentOfNative(); }
|
||||
}
|
||||
}
|
||||
|
||||
static void fillRandom(MemorySegment data) {
|
||||
for (int i = 0; i < (int)data.byteSize(); i += 8) {
|
||||
data.set(ValueLayout.JAVA_LONG_UNALIGNED, i, RANDOM.nextLong());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void verify(String name, Object[] gold, Object[] result) {
|
||||
if (gold.length != result.length) {
|
||||
throw new RuntimeException("verify " + name + ": not the same number of outputs: gold.length = " +
|
||||
gold.length + ", result.length = " + result.length);
|
||||
}
|
||||
for (int i = 0; i < gold.length; i++) {
|
||||
Object g = gold[i];
|
||||
Object r = result[i];
|
||||
if (g == r) {
|
||||
throw new RuntimeException("verify " + name + ": should be two separate objects (with identical content):" +
|
||||
" gold[" + i + "] == result[" + i + "]");
|
||||
}
|
||||
|
||||
if (!(g instanceof MemorySegment && r instanceof MemorySegment)) {
|
||||
throw new RuntimeException("verify " + name + ": only MemorySegment supported, i=" + i);
|
||||
}
|
||||
|
||||
MemorySegment mg = (MemorySegment)g;
|
||||
MemorySegment mr = (MemorySegment)r;
|
||||
|
||||
if (mg.byteSize() != mr.byteSize()) {
|
||||
throw new RuntimeException("verify " + name + ": MemorySegment must have same byteSize:" +
|
||||
" gold[" + i + "].byteSize = " + mg.byteSize() +
|
||||
" result[" + i + "].byteSize = " + mr.byteSize());
|
||||
}
|
||||
|
||||
for (int j = 0; j < (int)mg.byteSize(); j++) {
|
||||
byte vg = mg.get(ValueLayout.JAVA_BYTE, j);
|
||||
byte vr = mr.get(ValueLayout.JAVA_BYTE, j);
|
||||
if (vg != vr) {
|
||||
throw new RuntimeException("verify " + name + ": MemorySegment must have same content:" +
|
||||
" gold[" + i + "][" + j + "] = " + vg +
|
||||
" result[" + i + "][" + j + "] = " + vr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Run(test = { "test_xxx", "test_yyy", "test_zzz" })
|
||||
void runTests() {
|
||||
for (Map.Entry<String,TestFunction> entry : tests.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
TestFunction test = entry.getValue();
|
||||
// Recall gold value from before compilation
|
||||
Object[] gold = golds.get(name);
|
||||
// Compute new result
|
||||
Object[] result = test.run();
|
||||
// Compare gold and new result
|
||||
verify(name, gold, result);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC
|
||||
REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged
|
||||
REGEX_STORE_I_TO_MS_FROM_B, "0",
|
||||
REGEX_STORE_L_TO_MS_FROM_B, "0"},
|
||||
phase = CompilePhase.PRINT_IDEAL,
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test_xxx(MemorySegment a, int xI, int yI, int zI) {
|
||||
// All RangeChecks remain -> RC smearing not good enough?
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 0), (byte)'h');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 1), (byte)'e');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 2), (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 3), (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 4), (byte)'o');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 5), (byte)' ');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 6), (byte)':');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI + yI + zI + 7), (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC
|
||||
REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged
|
||||
REGEX_STORE_I_TO_MS_FROM_B, "0",
|
||||
REGEX_STORE_L_TO_MS_FROM_B, "0"},
|
||||
phase = CompilePhase.PRINT_IDEAL,
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test_yyy(MemorySegment a, int xI, int yI, int zI) {
|
||||
// All RangeChecks remain -> RC smearing not good enough?
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 0L, (byte)'h');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 1L, (byte)'e');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 2L, (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 3L, (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 4L, (byte)'o');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 5L, (byte)' ');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 6L, (byte)':');
|
||||
a.set(ValueLayout.JAVA_BYTE, (long)(xI) + (long)(yI) + (long)(zI) + 7L, (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
|
||||
@Test
|
||||
@IR(counts = {REGEX_STORE_B_TO_MS_FROM_B, "<=5", // 4x RC
|
||||
REGEX_STORE_C_TO_MS_FROM_B, ">=3", // 4x merged
|
||||
REGEX_STORE_I_TO_MS_FROM_B, "0",
|
||||
REGEX_STORE_L_TO_MS_FROM_B, "0"},
|
||||
phase = CompilePhase.PRINT_IDEAL,
|
||||
applyIf = {"UseUnalignedAccesses", "true"})
|
||||
static Object[] test_zzz(MemorySegment a, long xL, long yL, long zL) {
|
||||
// All RangeChecks remain -> RC smearing not good enough?
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 0L, (byte)'h');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 1L, (byte)'e');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 2L, (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 3L, (byte)'l');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 4L, (byte)'o');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 5L, (byte)' ');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 6L, (byte)':');
|
||||
a.set(ValueLayout.JAVA_BYTE, xL + yL + zL + 7L, (byte)')');
|
||||
return new Object[]{ a };
|
||||
}
|
||||
}
|
@ -52,6 +52,10 @@ public class TestMergeStoresUnsafeArrayPointer {
|
||||
static final long ANCHOR = BYTE_SIZE / 2;
|
||||
|
||||
static int four = 4;
|
||||
static int max_int = Integer.MAX_VALUE;
|
||||
static int min_int = Integer.MIN_VALUE;
|
||||
static int val_2_to_30 = (1 << 30);
|
||||
static int large_by_53 = (int)((1L << 31) / 53L + 1L);
|
||||
|
||||
public static void main(String[] args) {
|
||||
System.out.println("Allocate big array of SIZE = " + SIZE);
|
||||
@ -95,6 +99,103 @@ public class TestMergeStoresUnsafeArrayPointer {
|
||||
}
|
||||
}
|
||||
|
||||
val = 0;
|
||||
System.out.println("test3");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
testClear(big);
|
||||
test3(big, ANCHOR);
|
||||
long sum = testSum(big);
|
||||
if (i == 0) {
|
||||
val = sum;
|
||||
} else {
|
||||
if (sum != val) {
|
||||
System.out.println("ERROR: test3 had wrong value: " + val + " != " + sum);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = 0;
|
||||
System.out.println("test4");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
testClear(big);
|
||||
test4(big, ANCHOR);
|
||||
long sum = testSum(big);
|
||||
if (i == 0) {
|
||||
val = sum;
|
||||
} else {
|
||||
if (sum != val) {
|
||||
System.out.println("ERROR: test4 had wrong value: " + val + " != " + sum);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = 0;
|
||||
System.out.println("test5");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
testClear(big);
|
||||
test5(big, ANCHOR);
|
||||
long sum = testSum(big);
|
||||
if (i == 0) {
|
||||
val = sum;
|
||||
} else {
|
||||
if (sum != val) {
|
||||
System.out.println("ERROR: test5 had wrong value: " + val + " != " + sum);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = 0;
|
||||
System.out.println("test6");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
testClear(big);
|
||||
test6(big, ANCHOR);
|
||||
long sum = testSum(big);
|
||||
if (i == 0) {
|
||||
val = sum;
|
||||
} else {
|
||||
if (sum != val) {
|
||||
System.out.println("ERROR: test6 had wrong value: " + val + " != " + sum);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val = 0;
|
||||
System.out.println("test7");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
testClear(big);
|
||||
test7(big, ANCHOR);
|
||||
long sum = testSum(big);
|
||||
if (i == 0) {
|
||||
val = sum;
|
||||
} else {
|
||||
if (sum != val) {
|
||||
System.out.println("ERROR: test7 had wrong value: " + val + " != " + sum);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No result verification here. We only want to make sure we do not hit asserts.
|
||||
System.out.println("test8 and test9");
|
||||
for (int i = 0; i < 100_000; i++) {
|
||||
test8a(big, ANCHOR);
|
||||
test8b(big, ANCHOR);
|
||||
test8c(big, ANCHOR);
|
||||
test8d(big, ANCHOR);
|
||||
test9a(big, ANCHOR);
|
||||
test9b(big, ANCHOR);
|
||||
test9c(big, ANCHOR);
|
||||
}
|
||||
|
||||
if (errors > 0) {
|
||||
throw new RuntimeException("ERRORS: " + errors);
|
||||
}
|
||||
@ -129,4 +230,95 @@ public class TestMergeStoresUnsafeArrayPointer {
|
||||
UNSAFE.putInt(a, base + 0 + (long)(four + Integer.MAX_VALUE), 0x42424242);
|
||||
UNSAFE.putInt(a, base + Integer.MAX_VALUE + (long)(four + 4 ), 0x66666666);
|
||||
}
|
||||
|
||||
// Test: if MergeStores is applied this can lead to wrong results
|
||||
// -> AddI needs overflow check.
|
||||
static void test3(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putInt(a, base + (long)(max_int + 0), 0x42424242);
|
||||
UNSAFE.putInt(a, base + (long)(max_int + 4), 0x66666666);
|
||||
}
|
||||
|
||||
// Test: "max_int - four" cannot be parsed further, but would not make a difference here.
|
||||
static void test4(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putInt(a, base + (long)(min_int - four) + 0, 0x42424242);
|
||||
UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666);
|
||||
}
|
||||
|
||||
// Test: if MergeStores is applied this can lead to wrong results
|
||||
// -> SubI needs overflow check.
|
||||
static void test5(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putInt(a, base + (long)(min_int) - (long)(four) + 0, 0x42424242); // no overflow
|
||||
UNSAFE.putInt(a, base + (long)(min_int - four) + 4, 0x66666666); // overflow
|
||||
}
|
||||
|
||||
// Test: if MergeStores is applied this can lead to wrong results
|
||||
// -> LShiftI needs overflow check.
|
||||
static void test6(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putInt(a, base + (long)(2 * val_2_to_30) + 0, 0x42424242); // overflow
|
||||
UNSAFE.putInt(a, base + 2L * (long)(val_2_to_30) + 4, 0x66666666); // no overflow
|
||||
}
|
||||
|
||||
// Test: if MergeStores is applied this can lead to wrong results
|
||||
// -> MulI needs overflow check.
|
||||
static void test7(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putInt(a, base + (long)(53 * large_by_53) + 0, 0x42424242); // overflow
|
||||
UNSAFE.putInt(a, base + 53L * (long)(large_by_53) + 4, 0x66666666); // no overflow
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
static void test8a(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42);
|
||||
UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
static void test8b(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base + (1L << 11) + (1L << 30), (byte)11);
|
||||
UNSAFE.putByte(a, base + (1L << 11) + 0, (byte)42);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
static void test8c(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42);
|
||||
UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
static void test8d(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base - (1L << 11) - (1L << 30), (byte)11);
|
||||
UNSAFE.putByte(a, base - (1L << 11) - 0, (byte)42);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
// case: bad distance: NaN
|
||||
static void test9a(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base - 100, (byte)42);
|
||||
UNSAFE.putByte(a, base - 100 + (1L << 31), (byte)11);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
// case: just before NaN, it is still a valid distance for MemPointer aliasing.
|
||||
static void test9b(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base - 100, (byte)42);
|
||||
UNSAFE.putByte(a, base - 100 + (1L << 31) - 1, (byte)11);
|
||||
}
|
||||
|
||||
// Test: check if large distance leads to assert
|
||||
// case: constant too large
|
||||
static void test9c(int[] a, long anchor) {
|
||||
long base = UNSAFE.ARRAY_INT_BASE_OFFSET + anchor;
|
||||
UNSAFE.putByte(a, base, (byte)42);
|
||||
UNSAFE.putByte(a, base + (1L << 31), (byte)11);
|
||||
}
|
||||
}
|
||||
|
@ -41,12 +41,12 @@ import java.util.concurrent.TimeUnit;
|
||||
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@Warmup(iterations = 3, time = 3)
|
||||
@Measurement(iterations = 3, time = 3)
|
||||
@Fork(value = 3, jvmArgs = {
|
||||
@Warmup(iterations = 2, time = 1)
|
||||
@Measurement(iterations = 3, time = 1)
|
||||
@Fork(value = 1, jvmArgs = {
|
||||
"--add-exports", "java.base/jdk.internal.misc=ALL-UNNAMED",
|
||||
"--add-exports", "java.base/jdk.internal.util=ALL-UNNAMED"})
|
||||
@State(Scope.Benchmark)
|
||||
@State(Scope.Thread)
|
||||
public class MergeStores {
|
||||
|
||||
public static final int RANGE = 100;
|
||||
@ -66,6 +66,7 @@ public class MergeStores {
|
||||
public static byte[] aB = new byte[RANGE];
|
||||
public static short[] aS = new short[RANGE];
|
||||
public static int[] aI = new int[RANGE];
|
||||
public static long native_adr = UNSAFE.allocateMemory(RANGE * 8);
|
||||
|
||||
// -------------------------------------------
|
||||
// ------- Little-Endian API ----------
|
||||
@ -691,4 +692,59 @@ public class MergeStores {
|
||||
aI[offset + 1] = 0;
|
||||
return aI;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_B8_L_offs_noalloc_direct() {
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (byte)(vL >> 0 ));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 1, (byte)(vL >> 8 ));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (byte)(vL >> 16));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 3, (byte)(vL >> 24));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (byte)(vL >> 32));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 5, (byte)(vL >> 40));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (byte)(vL >> 48));
|
||||
UNSAFE.putByte(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 7, (byte)(vL >> 56));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_B8_L_offs_noalloc_unsafe() {
|
||||
UNSAFE.putLongUnaligned(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, vL);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_C4_L_offs_noalloc_direct() {
|
||||
UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 0, (char)(vL >> 0 ));
|
||||
UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 2, (char)(vL >> 16));
|
||||
UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 4, (char)(vL >> 32));
|
||||
UNSAFE.putChar(aB, Unsafe.ARRAY_BYTE_BASE_OFFSET + offset + 6, (char)(vL >> 48));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_native_B8_L_offs_noalloc_direct() {
|
||||
UNSAFE.putByte(null, native_adr + offset + 0, (byte)(vL >> 0 ));
|
||||
UNSAFE.putByte(null, native_adr + offset + 1, (byte)(vL >> 8 ));
|
||||
UNSAFE.putByte(null, native_adr + offset + 2, (byte)(vL >> 16));
|
||||
UNSAFE.putByte(null, native_adr + offset + 3, (byte)(vL >> 24));
|
||||
UNSAFE.putByte(null, native_adr + offset + 4, (byte)(vL >> 32));
|
||||
UNSAFE.putByte(null, native_adr + offset + 5, (byte)(vL >> 40));
|
||||
UNSAFE.putByte(null, native_adr + offset + 6, (byte)(vL >> 48));
|
||||
UNSAFE.putByte(null, native_adr + offset + 7, (byte)(vL >> 56));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_native_C4_L_offs_noalloc_direct() {
|
||||
UNSAFE.putChar(null, native_adr + offset + 0, (char)(vL >> 0 ));
|
||||
UNSAFE.putChar(null, native_adr + offset + 2, (char)(vL >> 16));
|
||||
UNSAFE.putChar(null, native_adr + offset + 4, (char)(vL >> 32));
|
||||
UNSAFE.putChar(null, native_adr + offset + 6, (char)(vL >> 48));
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public void store_unsafe_native_B8_L_offs_noalloc_unsafe() {
|
||||
UNSAFE.putLongUnaligned(null, native_adr + offset + 0, vL);
|
||||
}
|
||||
|
||||
@Fork(value = 1, jvmArgsPrepend = {
|
||||
"-XX:+UnlockDiagnosticVMOptions", "-XX:-MergeStores"
|
||||
})
|
||||
public static class MergeStoresDisabled extends MergeStores {}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user