4809552: Optimize Arrays.fill(...)
Reviewed-by: kvn
This commit is contained in:
parent
519c627fe5
commit
08d9e03b81
@ -1587,6 +1587,185 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for disjoint short fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: O0
|
||||
// value: O1
|
||||
// count: O2 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char* name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
const Register to = O0; // source array address
|
||||
const Register value = O1; // fill value
|
||||
const Register count = O2; // elements count
|
||||
// O3 is used as a temp register
|
||||
|
||||
assert_clean_int(count, O3); // Make sure 'count' is clean int.
|
||||
|
||||
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
|
||||
Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 2;
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 0;
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
if (t == T_BYTE) {
|
||||
// Zero extend value
|
||||
__ and3(value, 0xff, value);
|
||||
__ sllx(value, 8, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
if (t == T_SHORT) {
|
||||
// Zero extend value
|
||||
__ sethi(0xffff0000, O3);
|
||||
__ andn(value, O3, value);
|
||||
}
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ sllx(value, 16, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
|
||||
__ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
|
||||
__ delayed()->andcc(count, 1<<shift, G0);
|
||||
|
||||
if (!aligned && (t == T_BYTE || t == T_SHORT)) {
|
||||
// align source address at 4 bytes address boundary
|
||||
if (t == T_BYTE) {
|
||||
// One byte misalignment happens only for byte arrays
|
||||
__ andcc(to, 1, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
|
||||
__ delayed()->nop();
|
||||
__ stb(value, to, 0);
|
||||
__ inc(to, 1);
|
||||
__ dec(count, 1);
|
||||
__ BIND(L_skip_align1);
|
||||
}
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays
|
||||
__ andcc(to, 2, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
|
||||
__ delayed()->nop();
|
||||
__ sth(value, to, 0);
|
||||
__ inc(to, 2);
|
||||
__ dec(count, 1 << (shift - 1));
|
||||
__ BIND(L_skip_align2);
|
||||
}
|
||||
#ifdef _LP64
|
||||
if (!aligned) {
|
||||
#endif
|
||||
// align to 8 bytes, we know we are 4 byte aligned to start
|
||||
__ andcc(to, 7, G0);
|
||||
__ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
|
||||
__ delayed()->nop();
|
||||
__ stw(value, to, 0);
|
||||
__ inc(to, 4);
|
||||
__ dec(count, 1 << shift);
|
||||
__ BIND(L_fill_32_bytes);
|
||||
#ifdef _LP64
|
||||
}
|
||||
#endif
|
||||
|
||||
Label L_check_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
__ subcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
|
||||
__ delayed()->nop();
|
||||
|
||||
if (t == T_INT) {
|
||||
// Zero extend value
|
||||
__ srl(value, 0, value);
|
||||
}
|
||||
if (t == T_BYTE || t == T_SHORT || t == T_INT) {
|
||||
__ sllx(value, 32, O3);
|
||||
__ or3(value, O3, value);
|
||||
}
|
||||
|
||||
Label L_fill_32_bytes_loop;
|
||||
__ align(16);
|
||||
__ BIND(L_fill_32_bytes_loop);
|
||||
|
||||
__ stx(value, to, 0);
|
||||
__ stx(value, to, 8);
|
||||
__ stx(value, to, 16);
|
||||
__ stx(value, to, 24);
|
||||
|
||||
__ subcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
|
||||
__ delayed()->add(to, 32, to);
|
||||
|
||||
__ BIND(L_check_fill_8_bytes);
|
||||
__ addcc(count, 8 << shift, count);
|
||||
__ brx(Assembler::zero, false, Assembler::pn, L_exit);
|
||||
__ delayed()->subcc(count, 1 << (shift + 1), count);
|
||||
__ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
|
||||
__ delayed()->andcc(count, 1<<shift, G0);
|
||||
|
||||
//
|
||||
// length is too short, just fill 8 bytes at a time
|
||||
//
|
||||
Label L_fill_8_bytes_loop;
|
||||
__ BIND(L_fill_8_bytes_loop);
|
||||
__ stx(value, to, 0);
|
||||
__ subcc(count, 1 << (shift + 1), count);
|
||||
__ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
|
||||
__ delayed()->add(to, 8, to);
|
||||
|
||||
// fill trailing 4 bytes
|
||||
__ andcc(count, 1<<shift, G0); // in delay slot of branches
|
||||
__ BIND(L_fill_4_bytes);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ delayed()->andcc(count, 1<<(shift-1), G0);
|
||||
} else {
|
||||
__ delayed()->nop();
|
||||
}
|
||||
__ stw(value, to, 0);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
__ inc(to, 4);
|
||||
// fill trailing 2 bytes
|
||||
__ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
|
||||
__ BIND(L_fill_2_bytes);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
|
||||
__ delayed()->andcc(count, 1, count);
|
||||
__ sth(value, to, 0);
|
||||
if (t == T_BYTE) {
|
||||
__ inc(to, 2);
|
||||
// fill trailing byte
|
||||
__ andcc(count, 1, count); // in delay slot of branches
|
||||
__ BIND(L_fill_byte);
|
||||
__ brx(Assembler::zero, false, Assembler::pt, L_exit);
|
||||
__ delayed()->nop();
|
||||
__ stb(value, to, 0);
|
||||
} else {
|
||||
__ BIND(L_fill_byte);
|
||||
}
|
||||
} else {
|
||||
__ BIND(L_fill_2_bytes);
|
||||
}
|
||||
__ BIND(L_exit);
|
||||
__ retl();
|
||||
__ delayed()->mov(G0, O0); // return 0
|
||||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for conjoint short copy. If "aligned" is true, the
|
||||
// "from" and "to" addresses are assumed to be heapword aligned.
|
||||
@ -2855,6 +3034,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
|
||||
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
|
||||
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
}
|
||||
|
||||
void generate_initial() {
|
||||
|
@ -8767,6 +8767,186 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist
|
||||
bind(DONE);
|
||||
}
|
||||
|
||||
#ifdef PRODUCT
|
||||
#define BLOCK_COMMENT(str) /* nothing */
|
||||
#else
|
||||
#define BLOCK_COMMENT(str) block_comment(str)
|
||||
#endif
|
||||
|
||||
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
|
||||
void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
||||
Register to, Register value, Register count,
|
||||
Register rtmp, XMMRegister xtmp) {
|
||||
assert_different_registers(to, value, count, rtmp);
|
||||
Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
|
||||
Label L_fill_2_bytes, L_fill_4_bytes;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 2;
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 0;
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
if (t == T_BYTE) {
|
||||
andl(value, 0xff);
|
||||
movl(rtmp, value);
|
||||
shll(rtmp, 8);
|
||||
orl(value, rtmp);
|
||||
}
|
||||
if (t == T_SHORT) {
|
||||
andl(value, 0xffff);
|
||||
}
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
movl(rtmp, value);
|
||||
shll(rtmp, 16);
|
||||
orl(value, rtmp);
|
||||
}
|
||||
|
||||
cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
|
||||
jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
|
||||
if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
|
||||
// align source address at 4 bytes address boundary
|
||||
if (t == T_BYTE) {
|
||||
// One byte misalignment happens only for byte arrays
|
||||
testptr(to, 1);
|
||||
jccb(Assembler::zero, L_skip_align1);
|
||||
movb(Address(to, 0), value);
|
||||
increment(to);
|
||||
decrement(count);
|
||||
BIND(L_skip_align1);
|
||||
}
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays
|
||||
testptr(to, 2);
|
||||
jccb(Assembler::zero, L_skip_align2);
|
||||
movw(Address(to, 0), value);
|
||||
addptr(to, 2);
|
||||
subl(count, 1<<(shift-1));
|
||||
BIND(L_skip_align2);
|
||||
}
|
||||
if (UseSSE < 2) {
|
||||
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
subl(count, 8 << shift);
|
||||
jcc(Assembler::less, L_check_fill_8_bytes);
|
||||
align(16);
|
||||
|
||||
BIND(L_fill_32_bytes_loop);
|
||||
|
||||
for (int i = 0; i < 32; i += 4) {
|
||||
movl(Address(to, i), value);
|
||||
}
|
||||
|
||||
addptr(to, 32);
|
||||
subl(count, 8 << shift);
|
||||
jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
|
||||
BIND(L_check_fill_8_bytes);
|
||||
addl(count, 8 << shift);
|
||||
jccb(Assembler::zero, L_exit);
|
||||
jmpb(L_fill_8_bytes);
|
||||
|
||||
//
|
||||
// length is too short, just fill qwords
|
||||
//
|
||||
BIND(L_fill_8_bytes_loop);
|
||||
movl(Address(to, 0), value);
|
||||
movl(Address(to, 4), value);
|
||||
addptr(to, 8);
|
||||
BIND(L_fill_8_bytes);
|
||||
subl(count, 1 << (shift + 1));
|
||||
jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
|
||||
// fall through to fill 4 bytes
|
||||
} else {
|
||||
Label L_fill_32_bytes;
|
||||
if (!UseUnalignedLoadStores) {
|
||||
// align to 8 bytes, we know we are 4 byte aligned to start
|
||||
testptr(to, 4);
|
||||
jccb(Assembler::zero, L_fill_32_bytes);
|
||||
movl(Address(to, 0), value);
|
||||
addptr(to, 4);
|
||||
subl(count, 1<<shift);
|
||||
}
|
||||
BIND(L_fill_32_bytes);
|
||||
{
|
||||
assert( UseSSE >= 2, "supported cpu only" );
|
||||
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
|
||||
// Fill 32-byte chunks
|
||||
movdl(xtmp, value);
|
||||
pshufd(xtmp, xtmp, 0);
|
||||
|
||||
subl(count, 8 << shift);
|
||||
jcc(Assembler::less, L_check_fill_8_bytes);
|
||||
align(16);
|
||||
|
||||
BIND(L_fill_32_bytes_loop);
|
||||
|
||||
if (UseUnalignedLoadStores) {
|
||||
movdqu(Address(to, 0), xtmp);
|
||||
movdqu(Address(to, 16), xtmp);
|
||||
} else {
|
||||
movq(Address(to, 0), xtmp);
|
||||
movq(Address(to, 8), xtmp);
|
||||
movq(Address(to, 16), xtmp);
|
||||
movq(Address(to, 24), xtmp);
|
||||
}
|
||||
|
||||
addptr(to, 32);
|
||||
subl(count, 8 << shift);
|
||||
jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
|
||||
BIND(L_check_fill_8_bytes);
|
||||
addl(count, 8 << shift);
|
||||
jccb(Assembler::zero, L_exit);
|
||||
jmpb(L_fill_8_bytes);
|
||||
|
||||
//
|
||||
// length is too short, just fill qwords
|
||||
//
|
||||
BIND(L_fill_8_bytes_loop);
|
||||
movq(Address(to, 0), xtmp);
|
||||
addptr(to, 8);
|
||||
BIND(L_fill_8_bytes);
|
||||
subl(count, 1 << (shift + 1));
|
||||
jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
|
||||
}
|
||||
}
|
||||
// fill trailing 4 bytes
|
||||
BIND(L_fill_4_bytes);
|
||||
testl(count, 1<<shift);
|
||||
jccb(Assembler::zero, L_fill_2_bytes);
|
||||
movl(Address(to, 0), value);
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
addptr(to, 4);
|
||||
BIND(L_fill_2_bytes);
|
||||
// fill trailing 2 bytes
|
||||
testl(count, 1<<(shift-1));
|
||||
jccb(Assembler::zero, L_fill_byte);
|
||||
movw(Address(to, 0), value);
|
||||
if (t == T_BYTE) {
|
||||
addptr(to, 2);
|
||||
BIND(L_fill_byte);
|
||||
// fill trailing byte
|
||||
testl(count, 1);
|
||||
jccb(Assembler::zero, L_exit);
|
||||
movb(Address(to, 0), value);
|
||||
} else {
|
||||
BIND(L_fill_byte);
|
||||
}
|
||||
} else {
|
||||
BIND(L_fill_2_bytes);
|
||||
}
|
||||
BIND(L_exit);
|
||||
}
|
||||
#undef BIND
|
||||
#undef BLOCK_COMMENT
|
||||
|
||||
|
||||
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
|
||||
switch (cond) {
|
||||
// Note some conditions are synonyms for others
|
||||
|
@ -2242,6 +2242,11 @@ public:
|
||||
Register limit, Register result, Register chr,
|
||||
XMMRegister vec1, XMMRegister vec2);
|
||||
|
||||
// Fill primitive arrays
|
||||
void generate_fill(BasicType t, bool aligned,
|
||||
Register to, Register value, Register count,
|
||||
Register rtmp, XMMRegister xtmp);
|
||||
|
||||
#undef VIRTUAL
|
||||
|
||||
};
|
||||
|
@ -1039,6 +1039,33 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
|
||||
address generate_fill(BasicType t, bool aligned, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
const Register to = rdi; // source array address
|
||||
const Register value = rdx; // value
|
||||
const Register count = rsi; // elements count
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
__ movptr(to , Address(rsp, 12+ 4));
|
||||
__ movl(value, Address(rsp, 12+ 8));
|
||||
__ movl(count, Address(rsp, 12+ 12));
|
||||
|
||||
__ generate_fill(t, aligned, to, value, count, rax, xmm0);
|
||||
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_conjoint_copy(BasicType t, bool aligned,
|
||||
Address::ScaleFactor sf,
|
||||
address nooverlap_target,
|
||||
@ -2001,6 +2028,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
|
||||
"jlong_arraycopy");
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
|
||||
StubRoutines::_arrayof_jint_disjoint_arraycopy =
|
||||
StubRoutines::_jint_disjoint_arraycopy;
|
||||
StubRoutines::_arrayof_oop_disjoint_arraycopy =
|
||||
|
@ -1625,6 +1625,26 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_fill(BasicType t, bool aligned, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
const Register to = c_rarg0; // source array address
|
||||
const Register value = c_rarg1; // value
|
||||
const Register count = c_rarg2; // elements count
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
__ generate_fill(t, aligned, to, value, count, rax, xmm0);
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
// Arguments:
|
||||
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
|
||||
// ignored
|
||||
@ -2712,6 +2732,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
|
||||
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
|
||||
// We don't generate specialized code for HeapWord-aligned source
|
||||
// arrays, so just use the code we've already generated
|
||||
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
|
||||
|
@ -624,6 +624,7 @@ loopTransform.cpp divnode.hpp
|
||||
loopTransform.cpp loopnode.hpp
|
||||
loopTransform.cpp mulnode.hpp
|
||||
loopTransform.cpp rootnode.hpp
|
||||
loopTransform.cpp runtime.hpp
|
||||
loopTransform.cpp subnode.hpp
|
||||
|
||||
loopUnswitch.cpp allocation.inline.hpp
|
||||
|
@ -705,6 +705,9 @@ int AddPNode::unpack_offsets(Node* elements[], int length) {
|
||||
}
|
||||
addr = addr->in(AddPNode::Address);
|
||||
}
|
||||
if (addr != base) {
|
||||
return -1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
@ -157,6 +157,12 @@
|
||||
develop(bool, TraceLoopPredicate, false, \
|
||||
"Trace generation of loop predicates") \
|
||||
\
|
||||
product(bool, OptimizeFill, false, \
|
||||
"convert fill/copy loops into intrinsic") \
|
||||
\
|
||||
develop(bool, TraceOptimizeFill, false, \
|
||||
"print detailed information about fill conversion") \
|
||||
\
|
||||
develop(bool, OptoCoalesce, true, \
|
||||
"Use Conservative Copy Coalescing in the Register Allocator") \
|
||||
\
|
||||
|
@ -2049,11 +2049,18 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari
|
||||
if (cmp->Opcode() != Op_CmpU ) {
|
||||
return false;
|
||||
}
|
||||
if (cmp->in(2)->Opcode() != Op_LoadRange) {
|
||||
return false;
|
||||
Node* range = cmp->in(2);
|
||||
if (range->Opcode() != Op_LoadRange) {
|
||||
const TypeInt* tint = phase->_igvn.type(range)->isa_int();
|
||||
if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) {
|
||||
// Allow predication on positive values that aren't LoadRanges.
|
||||
// This allows optimization of loops where the length of the
|
||||
// array is a known value and doesn't need to be loaded back
|
||||
// from the array.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2);
|
||||
if (!invar.is_invariant(lr)) { // loadRange must be invariant
|
||||
if (!invar.is_invariant(range)) {
|
||||
return false;
|
||||
}
|
||||
Node *iv = _head->as_CountedLoop()->phi();
|
||||
@ -2248,9 +2255,9 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
|
||||
const Node* cmp = bol->in(1)->as_Cmp();
|
||||
Node* idx = cmp->in(1);
|
||||
assert(!invar.is_invariant(idx), "index is variant");
|
||||
assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be");
|
||||
Node* ld_rng = cmp->in(2); // LoadRangeNode
|
||||
assert(invar.is_invariant(ld_rng), "load range must be invariant");
|
||||
assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be");
|
||||
Node* rng = cmp->in(2);
|
||||
assert(invar.is_invariant(rng), "range must be invariant");
|
||||
int scale = 1;
|
||||
Node* offset = zero;
|
||||
bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
|
||||
@ -2271,21 +2278,21 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
|
||||
|
||||
// Perform cloning to keep Invariance state correct since the
|
||||
// late schedule will place invariant things in the loop.
|
||||
ld_rng = invar.clone(ld_rng, ctrl);
|
||||
rng = invar.clone(rng, ctrl);
|
||||
if (offset && offset != zero) {
|
||||
assert(invar.is_invariant(offset), "offset must be loop invariant");
|
||||
offset = invar.clone(offset, ctrl);
|
||||
}
|
||||
|
||||
// Test the lower bound
|
||||
Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, false);
|
||||
Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false);
|
||||
IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
|
||||
_igvn.hash_delete(lower_bound_iff);
|
||||
lower_bound_iff->set_req(1, lower_bound_bol);
|
||||
if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx);
|
||||
|
||||
// Test the upper bound
|
||||
Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, true);
|
||||
Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true);
|
||||
IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
|
||||
_igvn.hash_delete(upper_bound_iff);
|
||||
upper_bound_iff->set_req(1, upper_bound_bol);
|
||||
@ -2366,3 +2373,348 @@ bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) {
|
||||
|
||||
return hoisted;
|
||||
}
|
||||
|
||||
|
||||
// Process all the loops in the loop tree and replace any fill
|
||||
// patterns with an intrisc version.
|
||||
bool PhaseIdealLoop::do_intrinsify_fill() {
|
||||
bool changed = false;
|
||||
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
|
||||
IdealLoopTree* lpt = iter.current();
|
||||
changed |= intrinsify_fill(lpt);
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
// Examine an inner loop looking for a a single store of an invariant
|
||||
// value in a unit stride loop,
|
||||
bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
|
||||
Node*& shift, Node*& con) {
|
||||
const char* msg = NULL;
|
||||
Node* msg_node = NULL;
|
||||
|
||||
store_value = NULL;
|
||||
con = NULL;
|
||||
shift = NULL;
|
||||
|
||||
// Process the loop looking for stores. If there are multiple
|
||||
// stores or extra control flow give at this point.
|
||||
CountedLoopNode* head = lpt->_head->as_CountedLoop();
|
||||
for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
|
||||
Node* n = lpt->_body.at(i);
|
||||
if (n->outcnt() == 0) continue; // Ignore dead
|
||||
if (n->is_Store()) {
|
||||
if (store != NULL) {
|
||||
msg = "multiple stores";
|
||||
break;
|
||||
}
|
||||
int opc = n->Opcode();
|
||||
if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) {
|
||||
msg = "oop fills not handled";
|
||||
break;
|
||||
}
|
||||
Node* value = n->in(MemNode::ValueIn);
|
||||
if (!lpt->is_invariant(value)) {
|
||||
msg = "variant store value";
|
||||
}
|
||||
store = n;
|
||||
store_value = value;
|
||||
} else if (n->is_If() && n != head->loopexit()) {
|
||||
msg = "extra control flow";
|
||||
msg_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (store == NULL) {
|
||||
// No store in loop
|
||||
return false;
|
||||
}
|
||||
|
||||
if (msg == NULL && head->stride_con() != 1) {
|
||||
// could handle negative strides too
|
||||
if (head->stride_con() < 0) {
|
||||
msg = "negative stride";
|
||||
} else {
|
||||
msg = "non-unit stride";
|
||||
}
|
||||
}
|
||||
|
||||
if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) {
|
||||
msg = "can't handle store address";
|
||||
msg_node = store->in(MemNode::Address);
|
||||
}
|
||||
|
||||
// Make sure there is an appropriate fill routine
|
||||
BasicType t = store->as_Mem()->memory_type();
|
||||
const char* fill_name;
|
||||
if (msg == NULL &&
|
||||
StubRoutines::select_fill_function(t, false, fill_name) == NULL) {
|
||||
msg = "unsupported store";
|
||||
msg_node = store;
|
||||
}
|
||||
|
||||
if (msg != NULL) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceOptimizeFill) {
|
||||
tty->print_cr("not fill intrinsic candidate: %s", msg);
|
||||
if (msg_node != NULL) msg_node->dump();
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure the address expression can be handled. It should be
|
||||
// head->phi * elsize + con. head->phi might have a ConvI2L.
|
||||
Node* elements[4];
|
||||
Node* conv = NULL;
|
||||
int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
|
||||
for (int e = 0; e < count; e++) {
|
||||
Node* n = elements[e];
|
||||
if (n->is_Con() && con == NULL) {
|
||||
con = n;
|
||||
} else if (n->Opcode() == Op_LShiftX && shift == NULL) {
|
||||
Node* value = n->in(1);
|
||||
#ifdef _LP64
|
||||
if (value->Opcode() == Op_ConvI2L) {
|
||||
conv = value;
|
||||
value = value->in(1);
|
||||
}
|
||||
#endif
|
||||
if (value != head->phi()) {
|
||||
msg = "unhandled shift in address";
|
||||
} else {
|
||||
shift = n;
|
||||
assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match");
|
||||
}
|
||||
} else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
|
||||
if (n->in(1) == head->phi()) {
|
||||
conv = n;
|
||||
} else {
|
||||
msg = "unhandled input to ConvI2L";
|
||||
}
|
||||
} else if (n == head->phi()) {
|
||||
// no shift, check below for allowed cases
|
||||
} else {
|
||||
msg = "unhandled node in address";
|
||||
msg_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == -1) {
|
||||
msg = "malformed address expression";
|
||||
msg_node = store;
|
||||
}
|
||||
|
||||
// byte sized items won't have a shift
|
||||
if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) {
|
||||
msg = "can't find shift";
|
||||
msg_node = store;
|
||||
}
|
||||
|
||||
if (msg != NULL) {
|
||||
#ifndef PRODUCT
|
||||
if (TraceOptimizeFill) {
|
||||
tty->print_cr("not fill intrinsic: %s", msg);
|
||||
if (msg_node != NULL) msg_node->dump();
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
// No make sure all the other nodes in the loop can be handled
|
||||
VectorSet ok(Thread::current()->resource_area());
|
||||
|
||||
// store related values are ok
|
||||
ok.set(store->_idx);
|
||||
ok.set(store->in(MemNode::Memory)->_idx);
|
||||
|
||||
// Loop structure is ok
|
||||
ok.set(head->_idx);
|
||||
ok.set(head->loopexit()->_idx);
|
||||
ok.set(head->phi()->_idx);
|
||||
ok.set(head->incr()->_idx);
|
||||
ok.set(head->loopexit()->cmp_node()->_idx);
|
||||
ok.set(head->loopexit()->in(1)->_idx);
|
||||
|
||||
// Address elements are ok
|
||||
if (con) ok.set(con->_idx);
|
||||
if (shift) ok.set(shift->_idx);
|
||||
if (conv) ok.set(conv->_idx);
|
||||
|
||||
for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
|
||||
Node* n = lpt->_body.at(i);
|
||||
if (n->outcnt() == 0) continue; // Ignore dead
|
||||
if (ok.test(n->_idx)) continue;
|
||||
// Backedge projection is ok
|
||||
if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue;
|
||||
if (!n->is_AddP()) {
|
||||
msg = "unhandled node";
|
||||
msg_node = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure no unexpected values are used outside the loop
|
||||
for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
|
||||
Node* n = lpt->_body.at(i);
|
||||
// These values can be replaced with other nodes if they are used
|
||||
// outside the loop.
|
||||
if (n == store || n == head->loopexit() || n == head->incr()) continue;
|
||||
for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
|
||||
Node* use = iter.get();
|
||||
if (!lpt->_body.contains(use)) {
|
||||
msg = "node is used outside loop";
|
||||
// lpt->_body.dump();
|
||||
msg_node = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
if (TraceOptimizeFill) {
|
||||
if (msg != NULL) {
|
||||
tty->print_cr("no fill intrinsic: %s", msg);
|
||||
if (msg_node != NULL) msg_node->dump();
|
||||
} else {
|
||||
tty->print_cr("fill intrinsic for:");
|
||||
}
|
||||
store->dump();
|
||||
if (Verbose) {
|
||||
lpt->_body.dump();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return msg == NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
|
||||
// Only for counted inner loops
|
||||
if (!lpt->is_counted() || !lpt->is_inner()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Must have constant stride
|
||||
CountedLoopNode* head = lpt->_head->as_CountedLoop();
|
||||
if (!head->stride_is_con() || !head->is_normal_loop()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that the body only contains a store of a loop invariant
|
||||
// value that is indexed by the loop phi.
|
||||
Node* store = NULL;
|
||||
Node* store_value = NULL;
|
||||
Node* shift = NULL;
|
||||
Node* offset = NULL;
|
||||
if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now replace the whole loop body by a call to a fill routine that
|
||||
// covers the same region as the loop.
|
||||
Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
|
||||
|
||||
// Build an expression for the beginning of the copy region
|
||||
Node* index = head->init_trip();
|
||||
#ifdef _LP64
|
||||
index = new (C, 2) ConvI2LNode(index);
|
||||
_igvn.register_new_node_with_optimizer(index);
|
||||
#endif
|
||||
if (shift != NULL) {
|
||||
// byte arrays don't require a shift but others do.
|
||||
index = new (C, 3) LShiftXNode(index, shift->in(2));
|
||||
_igvn.register_new_node_with_optimizer(index);
|
||||
}
|
||||
index = new (C, 4) AddPNode(base, base, index);
|
||||
_igvn.register_new_node_with_optimizer(index);
|
||||
Node* from = new (C, 4) AddPNode(base, index, offset);
|
||||
_igvn.register_new_node_with_optimizer(from);
|
||||
// Compute the number of elements to copy
|
||||
Node* len = new (C, 3) SubINode(head->limit(), head->init_trip());
|
||||
_igvn.register_new_node_with_optimizer(len);
|
||||
|
||||
BasicType t = store->as_Mem()->memory_type();
|
||||
bool aligned = false;
|
||||
if (offset != NULL && head->init_trip()->is_Con()) {
|
||||
int element_size = type2aelembytes(t);
|
||||
aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0;
|
||||
}
|
||||
|
||||
// Build a call to the fill routine
|
||||
const char* fill_name;
|
||||
address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
|
||||
assert(fill != NULL, "what?");
|
||||
|
||||
// Convert float/double to int/long for fill routines
|
||||
if (t == T_FLOAT) {
|
||||
store_value = new (C, 2) MoveF2INode(store_value);
|
||||
_igvn.register_new_node_with_optimizer(store_value);
|
||||
} else if (t == T_DOUBLE) {
|
||||
store_value = new (C, 2) MoveD2LNode(store_value);
|
||||
_igvn.register_new_node_with_optimizer(store_value);
|
||||
}
|
||||
|
||||
Node* mem_phi = store->in(MemNode::Memory);
|
||||
Node* result_ctrl;
|
||||
Node* result_mem;
|
||||
const TypeFunc* call_type = OptoRuntime::array_fill_Type();
|
||||
int size = call_type->domain()->cnt();
|
||||
CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill,
|
||||
fill_name, TypeAryPtr::get_array_body_type(t));
|
||||
call->init_req(TypeFunc::Parms+0, from);
|
||||
call->init_req(TypeFunc::Parms+1, store_value);
|
||||
call->init_req(TypeFunc::Parms+2, len);
|
||||
call->init_req( TypeFunc::Control, head->init_control());
|
||||
call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o
|
||||
call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) );
|
||||
call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) );
|
||||
call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) );
|
||||
_igvn.register_new_node_with_optimizer(call);
|
||||
result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
|
||||
_igvn.register_new_node_with_optimizer(result_ctrl);
|
||||
result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory);
|
||||
_igvn.register_new_node_with_optimizer(result_mem);
|
||||
|
||||
// If this fill is tightly coupled to an allocation and overwrites
|
||||
// the whole body, allow it to take over the zeroing.
|
||||
AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
|
||||
if (alloc != NULL && alloc->is_AllocateArray()) {
|
||||
Node* length = alloc->as_AllocateArray()->Ideal_length();
|
||||
if (head->limit() == length &&
|
||||
head->init_trip() == _igvn.intcon(0)) {
|
||||
if (TraceOptimizeFill) {
|
||||
tty->print_cr("Eliminated zeroing in allocation");
|
||||
}
|
||||
alloc->maybe_set_complete(&_igvn);
|
||||
} else {
|
||||
#ifdef ASSERT
|
||||
if (TraceOptimizeFill) {
|
||||
tty->print_cr("filling array but bounds don't match");
|
||||
alloc->dump();
|
||||
head->init_trip()->dump();
|
||||
head->limit()->dump();
|
||||
length->dump();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Redirect the old control and memory edges that are outside the loop.
|
||||
Node* exit = head->loopexit()->proj_out(0);
|
||||
_igvn.replace_node(exit, result_ctrl);
|
||||
_igvn.replace_node(store, result_mem);
|
||||
// Any uses the increment outside of the loop become the loop limit.
|
||||
_igvn.replace_node(head->incr(), head->limit());
|
||||
|
||||
// Disconnect the head from the loop.
|
||||
for (uint i = 0; i < lpt->_body.size(); i++) {
|
||||
Node* n = lpt->_body.at(i);
|
||||
_igvn.replace_node(n, C->top());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1673,6 +1673,12 @@ void PhaseIdealLoop::build_and_optimize(bool do_split_ifs, bool do_loop_pred) {
|
||||
_ltree_root->_child->loop_predication(this);
|
||||
}
|
||||
|
||||
if (OptimizeFill && UseLoopPredicate && C->has_loops() && !C->major_progress()) {
|
||||
if (do_intrinsify_fill()) {
|
||||
C->set_major_progress();
|
||||
}
|
||||
}
|
||||
|
||||
// Perform iteration-splitting on inner loops. Split iterations to avoid
|
||||
// range checks or one-shot null checks.
|
||||
|
||||
|
@ -937,6 +937,12 @@ public:
|
||||
// same block. Split thru the Region.
|
||||
void do_split_if( Node *iff );
|
||||
|
||||
// Conversion of fill/copy patterns into intrisic versions
|
||||
bool do_intrinsify_fill();
|
||||
bool intrinsify_fill(IdealLoopTree* lpt);
|
||||
bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
|
||||
Node*& shift, Node*& offset);
|
||||
|
||||
private:
|
||||
// Return a type based on condition control flow
|
||||
const TypeInt* filtered_type( Node *n, Node* n_ctrl);
|
||||
|
@ -645,6 +645,22 @@ const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
|
||||
}
|
||||
|
||||
|
||||
const TypeFunc* OptoRuntime::array_fill_Type() {
|
||||
// create input type (domain)
|
||||
const Type** fields = TypeTuple::fields(3);
|
||||
fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
|
||||
fields[TypeFunc::Parms+1] = TypeInt::INT;
|
||||
fields[TypeFunc::Parms+2] = TypeInt::INT;
|
||||
const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms + 3, fields);
|
||||
|
||||
// create result type
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms+0] = NULL; // void
|
||||
const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
//------------- Interpreter state access for on stack replacement
|
||||
const TypeFunc* OptoRuntime::osr_end_Type() {
|
||||
// create input type (domain)
|
||||
|
@ -260,6 +260,8 @@ private:
|
||||
static const TypeFunc* generic_arraycopy_Type();
|
||||
static const TypeFunc* slow_arraycopy_Type(); // the full routine
|
||||
|
||||
static const TypeFunc* array_fill_Type();
|
||||
|
||||
// leaf on stack replacement interpreter accessor types
|
||||
static const TypeFunc* osr_end_Type();
|
||||
|
||||
|
@ -1513,6 +1513,9 @@ void Arguments::set_aggressive_opts_flags() {
|
||||
if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
|
||||
FLAG_SET_DEFAULT(OptimizeStringConcat, true);
|
||||
}
|
||||
if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeFill)) {
|
||||
FLAG_SET_DEFAULT(OptimizeFill, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (AggressiveOpts) {
|
||||
|
@ -97,6 +97,15 @@ address StubRoutines::_checkcast_arraycopy = NULL;
|
||||
address StubRoutines::_unsafe_arraycopy = NULL;
|
||||
address StubRoutines::_generic_arraycopy = NULL;
|
||||
|
||||
|
||||
address StubRoutines::_jbyte_fill;
|
||||
address StubRoutines::_jshort_fill;
|
||||
address StubRoutines::_jint_fill;
|
||||
address StubRoutines::_arrayof_jbyte_fill;
|
||||
address StubRoutines::_arrayof_jshort_fill;
|
||||
address StubRoutines::_arrayof_jint_fill;
|
||||
|
||||
|
||||
double (* StubRoutines::_intrinsic_log )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
||||
double (* StubRoutines::_intrinsic_exp )(double) = NULL;
|
||||
@ -195,6 +204,46 @@ void StubRoutines::initialize2() {
|
||||
|
||||
#undef TEST_ARRAYCOPY
|
||||
|
||||
#define TEST_FILL(type) \
|
||||
if (_##type##_fill != NULL) { \
|
||||
union { \
|
||||
double d; \
|
||||
type body[96]; \
|
||||
} s; \
|
||||
\
|
||||
int v = 32; \
|
||||
for (int offset = -2; offset <= 2; offset++) { \
|
||||
for (int i = 0; i < 96; i++) { \
|
||||
s.body[i] = 1; \
|
||||
} \
|
||||
type* start = s.body + 8 + offset; \
|
||||
for (int aligned = 0; aligned < 2; aligned++) { \
|
||||
if (aligned) { \
|
||||
if (((intptr_t)start) % HeapWordSize == 0) { \
|
||||
((void (*)(type*, int, int))StubRoutines::_arrayof_##type##_fill)(start, v, 80); \
|
||||
} else { \
|
||||
continue; \
|
||||
} \
|
||||
} else { \
|
||||
((void (*)(type*, int, int))StubRoutines::_##type##_fill)(start, v, 80); \
|
||||
} \
|
||||
for (int i = 0; i < 96; i++) { \
|
||||
if (i < (8 + offset) || i >= (88 + offset)) { \
|
||||
assert(s.body[i] == 1, "what?"); \
|
||||
} else { \
|
||||
assert(s.body[i] == 32, "what?"); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
|
||||
TEST_FILL(jbyte);
|
||||
TEST_FILL(jshort);
|
||||
TEST_FILL(jint);
|
||||
|
||||
#undef TEST_FILL
|
||||
|
||||
#define TEST_COPYRTN(type) \
|
||||
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::conjoint_##type##s_atomic), sizeof(type)); \
|
||||
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::arrayof_conjoint_##type##s), (int)MAX2(sizeof(HeapWord), sizeof(type)))
|
||||
@ -315,3 +364,39 @@ JRT_LEAF(void, StubRoutines::arrayof_oop_copy(HeapWord* src, HeapWord* dest, siz
|
||||
Copy::arrayof_conjoint_oops(src, dest, count);
|
||||
gen_arraycopy_barrier((oop *) dest, count);
|
||||
JRT_END
|
||||
|
||||
|
||||
address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
|
||||
#define RETURN_STUB(xxx_fill) { \
|
||||
name = #xxx_fill; \
|
||||
return StubRoutines::xxx_fill(); }
|
||||
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
case T_BOOLEAN:
|
||||
if (!aligned) RETURN_STUB(jbyte_fill);
|
||||
RETURN_STUB(arrayof_jbyte_fill);
|
||||
case T_CHAR:
|
||||
case T_SHORT:
|
||||
if (!aligned) RETURN_STUB(jshort_fill);
|
||||
RETURN_STUB(arrayof_jshort_fill);
|
||||
case T_INT:
|
||||
case T_FLOAT:
|
||||
if (!aligned) RETURN_STUB(jint_fill);
|
||||
RETURN_STUB(arrayof_jint_fill);
|
||||
case T_DOUBLE:
|
||||
case T_LONG:
|
||||
case T_ARRAY:
|
||||
case T_OBJECT:
|
||||
case T_NARROWOOP:
|
||||
case T_ADDRESS:
|
||||
// Currently unsupported
|
||||
return NULL;
|
||||
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef RETURN_STUB
|
||||
}
|
||||
|
@ -148,6 +148,13 @@ class StubRoutines: AllStatic {
|
||||
static address _unsafe_arraycopy;
|
||||
static address _generic_arraycopy;
|
||||
|
||||
static address _jbyte_fill;
|
||||
static address _jshort_fill;
|
||||
static address _jint_fill;
|
||||
static address _arrayof_jbyte_fill;
|
||||
static address _arrayof_jshort_fill;
|
||||
static address _arrayof_jint_fill;
|
||||
|
||||
// These are versions of the java.lang.Math methods which perform
|
||||
// the same operations as the intrinsic version. They are used for
|
||||
// constant folding in the compiler to ensure equivalence. If the
|
||||
@ -259,6 +266,16 @@ class StubRoutines: AllStatic {
|
||||
static address unsafe_arraycopy() { return _unsafe_arraycopy; }
|
||||
static address generic_arraycopy() { return _generic_arraycopy; }
|
||||
|
||||
static address jbyte_fill() { return _jbyte_fill; }
|
||||
static address jshort_fill() { return _jshort_fill; }
|
||||
static address jint_fill() { return _jint_fill; }
|
||||
static address arrayof_jbyte_fill() { return _arrayof_jbyte_fill; }
|
||||
static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
|
||||
static address arrayof_jint_fill() { return _arrayof_jint_fill; }
|
||||
|
||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||
|
||||
|
||||
static double intrinsic_log(double d) {
|
||||
assert(_intrinsic_log != NULL, "must be defined");
|
||||
return _intrinsic_log(d);
|
||||
|
@ -529,7 +529,7 @@ extern int _type2aelembytes[T_CONFLICT+1]; // maps a BasicType to nof bytes used
|
||||
#ifdef ASSERT
|
||||
extern int type2aelembytes(BasicType t, bool allow_address = false); // asserts
|
||||
#else
|
||||
inline int type2aelembytes(BasicType t) { return _type2aelembytes[t]; }
|
||||
inline int type2aelembytes(BasicType t, bool allow_address = false) { return _type2aelembytes[t]; }
|
||||
#endif
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user