8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64

Reviewed-by: kvn
This commit is contained in:
Andrew Haley 2015-12-14 15:53:48 +00:00
parent 1a4c3a752d
commit c2221a88e8

View File

@ -958,8 +958,8 @@ class StubGenerator: public StubCodeGenerator {
const Register t0 = r3, t1 = r4; const Register t0 = r3, t1 = r4;
if (is_backwards) { if (is_backwards) {
__ lea(s, Address(s, count, Address::uxtw(exact_log2(-step)))); __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
__ lea(d, Address(d, count, Address::uxtw(exact_log2(-step)))); __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
} }
Label done, tail; Label done, tail;
@ -1051,10 +1051,10 @@ class StubGenerator: public StubCodeGenerator {
__ cmp(rscratch2, count); __ cmp(rscratch2, count);
__ br(Assembler::HS, end); __ br(Assembler::HS, end);
if (size == (size_t)wordSize) { if (size == (size_t)wordSize) {
__ ldr(temp, Address(a, rscratch2, Address::uxtw(exact_log2(size)))); __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
__ verify_oop(temp); __ verify_oop(temp);
} else { } else {
__ ldrw(r16, Address(a, rscratch2, Address::uxtw(exact_log2(size)))); __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
__ decode_heap_oop(temp); // calls verify_oop __ decode_heap_oop(temp); // calls verify_oop
} }
__ add(rscratch2, rscratch2, size); __ add(rscratch2, rscratch2, size);
@ -1087,12 +1087,14 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment); __ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name); StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc(); address start = __ pc();
__ enter();
if (entry != NULL) { if (entry != NULL) {
*entry = __ pc(); *entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory) // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:"); BLOCK_COMMENT("Entry:");
} }
__ enter();
if (is_oop) { if (is_oop) {
__ push(RegSet::of(d, count), sp); __ push(RegSet::of(d, count), sp);
// no registers are destroyed by this call // no registers are destroyed by this call
@ -1104,10 +1106,11 @@ class StubGenerator: public StubCodeGenerator {
if (VerifyOops) if (VerifyOops)
verify_oop_array(size, d, count, r16); verify_oop_array(size, d, count, r16);
__ sub(count, count, 1); // make an inclusive end pointer __ sub(count, count, 1); // make an inclusive end pointer
__ lea(count, Address(d, count, Address::uxtw(exact_log2(size)))); __ lea(count, Address(d, count, Address::lsl(exact_log2(size))));
gen_write_ref_array_post_barrier(d, count, rscratch1); gen_write_ref_array_post_barrier(d, count, rscratch1);
} }
__ leave(); __ leave();
__ mov(r0, zr); // return 0
__ ret(lr); __ ret(lr);
#ifdef BUILTIN_SIM #ifdef BUILTIN_SIM
{ {
@ -1140,11 +1143,16 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", name); StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc(); address start = __ pc();
__ enter();
if (entry != NULL) {
*entry = __ pc();
// caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
BLOCK_COMMENT("Entry:");
}
__ cmp(d, s); __ cmp(d, s);
__ br(Assembler::LS, nooverlap_target); __ br(Assembler::LS, nooverlap_target);
__ enter();
if (is_oop) { if (is_oop) {
__ push(RegSet::of(d, count), sp); __ push(RegSet::of(d, count), sp);
// no registers are destroyed by this call // no registers are destroyed by this call
@ -1160,6 +1168,7 @@ class StubGenerator: public StubCodeGenerator {
gen_write_ref_array_post_barrier(d, count, rscratch1); gen_write_ref_array_post_barrier(d, count, rscratch1);
} }
__ leave(); __ leave();
__ mov(r0, zr); // return 0
__ ret(lr); __ ret(lr);
#ifdef BUILTIN_SIM #ifdef BUILTIN_SIM
{ {
@ -1559,7 +1568,29 @@ class StubGenerator: public StubCodeGenerator {
Register dst_pos, // destination position (c_rarg3) Register dst_pos, // destination position (c_rarg3)
Register length, Register length,
Register temp, Register temp,
Label& L_failed) { Unimplemented(); } Label& L_failed) {
BLOCK_COMMENT("arraycopy_range_checks:");
assert_different_registers(rscratch1, temp);
// if (src_pos + length > arrayOop(src)->length()) FAIL;
__ ldrw(rscratch1, Address(src, arrayOopDesc::length_offset_in_bytes()));
__ addw(temp, length, src_pos);
__ cmpw(temp, rscratch1);
__ br(Assembler::HI, L_failed);
// if (dst_pos + length > arrayOop(dst)->length()) FAIL;
__ ldrw(rscratch1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
__ addw(temp, length, dst_pos);
__ cmpw(temp, rscratch1);
__ br(Assembler::HI, L_failed);
// Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
__ movw(src_pos, src_pos);
__ movw(dst_pos, dst_pos);
BLOCK_COMMENT("arraycopy_range_checks done");
}
// These stubs get called from some dumb test routine. // These stubs get called from some dumb test routine.
// I'll write them properly when they're called from // I'll write them properly when they're called from
@ -1569,6 +1600,309 @@ class StubGenerator: public StubCodeGenerator {
} }
//
// Generate 'unsafe' array copy stub
// Though just as safe as the other stubs, it takes an unscaled
// size_t argument instead of an element count.
//
// Input:
// c_rarg0 - source array address
// c_rarg1 - destination array address
// c_rarg2 - byte count, treated as ssize_t, can be zero
//
// Examines the alignment of the operands and dispatches
// to a long, int, short, or byte copy loop.
//
address generate_unsafe_copy(const char *name,
address byte_copy_entry) {
#ifdef PRODUCT
return StubRoutines::_jbyte_arraycopy;
#else
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
// bump this on entry, not on exit:
__ lea(rscratch2, ExternalAddress((address)&SharedRuntime::_unsafe_array_copy_ctr));
__ incrementw(Address(rscratch2));
__ b(RuntimeAddress(byte_copy_entry));
return start;
#endif
}
//
// Generate generic array copy stubs
//
// Input:
// c_rarg0 - src oop
// c_rarg1 - src_pos (32-bits)
// c_rarg2 - dst oop
// c_rarg3 - dst_pos (32-bits)
// c_rarg4 - element count (32-bits)
//
// Output:
// r0 == 0 - success
// r0 == -1^K - failure, where K is partial transfer count
//
address generate_generic_copy(const char *name,
address byte_copy_entry, address short_copy_entry,
address int_copy_entry, address oop_copy_entry,
address long_copy_entry, address checkcast_copy_entry) {
Label L_failed, L_failed_0, L_objArray;
Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
// Input registers
const Register src = c_rarg0; // source array oop
const Register src_pos = c_rarg1; // source position
const Register dst = c_rarg2; // destination array oop
const Register dst_pos = c_rarg3; // destination position
const Register length = c_rarg4;
StubCodeMark mark(this, "StubRoutines", name);
__ align(CodeEntryAlignment);
address start = __ pc();
__ enter(); // required for proper stackwalking of RuntimeStub frame
// bump this on entry, not on exit:
inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
//-----------------------------------------------------------------------
// Assembler stub will be used for this call to arraycopy
// if the following conditions are met:
//
// (1) src and dst must not be null.
// (2) src_pos must not be negative.
// (3) dst_pos must not be negative.
// (4) length must not be negative.
// (5) src klass and dst klass should be the same and not NULL.
// (6) src and dst should be arrays.
// (7) src_pos + length must not exceed length of src.
// (8) dst_pos + length must not exceed length of dst.
//
// if (src == NULL) return -1;
__ cbz(src, L_failed);
// if (src_pos < 0) return -1;
__ tbnz(src_pos, 31, L_failed); // i.e. sign bit set
// if (dst == NULL) return -1;
__ cbz(dst, L_failed);
// if (dst_pos < 0) return -1;
__ tbnz(dst_pos, 31, L_failed); // i.e. sign bit set
// registers used as temp
const Register scratch_length = r16; // elements count to copy
const Register scratch_src_klass = r17; // array klass
const Register lh = r18; // layout helper
// if (length < 0) return -1;
__ movw(scratch_length, length); // length (elements count, 32-bits value)
__ tbnz(scratch_length, 31, L_failed); // i.e. sign bit set
__ load_klass(scratch_src_klass, src);
#ifdef ASSERT
// assert(src->klass() != NULL);
{
BLOCK_COMMENT("assert klasses not null {");
Label L1, L2;
__ cbnz(scratch_src_klass, L2); // it is broken if klass is NULL
__ bind(L1);
__ stop("broken null klass");
__ bind(L2);
__ load_klass(rscratch1, dst);
__ cbz(rscratch1, L1); // this would be broken also
BLOCK_COMMENT("} assert klasses not null done");
}
#endif
// Load layout helper (32-bits)
//
// |array_tag| | header_size | element_type | |log2_element_size|
// 32 30 24 16 8 2 0
//
// array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
//
const int lh_offset = in_bytes(Klass::layout_helper_offset());
// Handle objArrays completely differently...
const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
__ ldrw(lh, Address(scratch_src_klass, lh_offset));
__ movw(rscratch1, objArray_lh);
__ eorw(rscratch2, lh, rscratch1);
__ cbzw(rscratch2, L_objArray);
// if (src->klass() != dst->klass()) return -1;
__ load_klass(rscratch2, dst);
__ eor(rscratch2, rscratch2, scratch_src_klass);
__ cbnz(rscratch2, L_failed);
// if (!src->is_Array()) return -1;
__ tbz(lh, 31, L_failed); // i.e. (lh >= 0)
// At this point, it is known to be a typeArray (array_tag 0x3).
#ifdef ASSERT
{
BLOCK_COMMENT("assert primitive array {");
Label L;
__ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
__ cmpw(lh, rscratch2);
__ br(Assembler::GE, L);
__ stop("must be a primitive array");
__ bind(L);
BLOCK_COMMENT("} assert primitive array done");
}
#endif
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
rscratch2, L_failed);
// TypeArrayKlass
//
// src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
// dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
//
const Register rscratch1_offset = rscratch1; // array offset
const Register r18_elsize = lh; // element size
__ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
exact_log2(Klass::_lh_header_size_mask+1)); // array_offset
__ add(src, src, rscratch1_offset); // src array offset
__ add(dst, dst, rscratch1_offset); // dst array offset
BLOCK_COMMENT("choose copy loop based on element size");
// next registers should be set before the jump to corresponding stub
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
const Register count = c_rarg2; // elements count
// 'from', 'to', 'count' registers should be set in such order
// since they are the same as 'src', 'src_pos', 'dst'.
assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
// The possible values of elsize are 0-3, i.e. exact_log2(element
// size in bytes). We do a simple bitwise binary search.
__ BIND(L_copy_bytes);
__ tbnz(r18_elsize, 1, L_copy_ints);
__ tbnz(r18_elsize, 0, L_copy_shorts);
__ lea(from, Address(src, src_pos));// src_addr
__ lea(to, Address(dst, dst_pos));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(byte_copy_entry));
__ BIND(L_copy_shorts);
__ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
__ lea(to, Address(dst, dst_pos, Address::lsl(1)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(short_copy_entry));
__ BIND(L_copy_ints);
__ tbnz(r18_elsize, 0, L_copy_longs);
__ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
__ lea(to, Address(dst, dst_pos, Address::lsl(2)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(int_copy_entry));
__ BIND(L_copy_longs);
#ifdef ASSERT
{
BLOCK_COMMENT("assert long copy {");
Label L;
__ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
__ cmpw(r18_elsize, LogBytesPerLong);
__ br(Assembler::EQ, L);
__ stop("must be long copy, but elsize is wrong");
__ bind(L);
BLOCK_COMMENT("} assert long copy done");
}
#endif
__ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
__ lea(to, Address(dst, dst_pos, Address::lsl(3)));// dst_addr
__ movw(count, scratch_length); // length
__ b(RuntimeAddress(long_copy_entry));
// ObjArrayKlass
__ BIND(L_objArray);
// live at this point: scratch_src_klass, scratch_length, src[_pos], dst[_pos]
Label L_plain_copy, L_checkcast_copy;
// test array classes for subtyping
__ load_klass(r18, dst);
__ cmp(scratch_src_klass, r18); // usual case is exact equality
__ br(Assembler::NE, L_checkcast_copy);
// Identically typed arrays can be copied without element-wise checks.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
rscratch2, L_failed);
__ lea(from, Address(src, src_pos, Address::lsl(3)));
__ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ lea(to, Address(dst, dst_pos, Address::lsl(3)));
__ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ movw(count, scratch_length); // length
__ BIND(L_plain_copy);
__ b(RuntimeAddress(oop_copy_entry));
__ BIND(L_checkcast_copy);
// live at this point: scratch_src_klass, scratch_length, r18 (dst_klass)
{
// Before looking at dst.length, make sure dst is also an objArray.
__ ldrw(rscratch1, Address(r18, lh_offset));
__ movw(rscratch2, objArray_lh);
__ eorw(rscratch1, rscratch1, rscratch2);
__ cbnzw(rscratch1, L_failed);
// It is safe to examine both src.length and dst.length.
arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
r18, L_failed);
const Register rscratch2_dst_klass = rscratch2;
__ load_klass(rscratch2_dst_klass, dst); // reload
// Marshal the base address arguments now, freeing registers.
__ lea(from, Address(src, src_pos, Address::lsl(3)));
__ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ lea(to, Address(dst, dst_pos, Address::lsl(3)));
__ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ movw(count, length); // length (reloaded)
Register sco_temp = c_rarg3; // this register is free now
assert_different_registers(from, to, count, sco_temp,
rscratch2_dst_klass, scratch_src_klass);
// assert_clean_int(count, sco_temp);
// Generate the type check.
const int sco_offset = in_bytes(Klass::super_check_offset_offset());
__ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
// assert_clean_int(sco_temp, r18);
generate_type_check(scratch_src_klass, sco_temp, rscratch2_dst_klass, L_plain_copy);
// Fetch destination element klass from the ObjArrayKlass header.
int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
__ ldr(rscratch2_dst_klass, Address(rscratch2_dst_klass, ek_offset));
__ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
// the checkcast_copy loop needs two extra arguments:
assert(c_rarg3 == sco_temp, "#3 already in place");
// Set up arguments for checkcast_copy_entry.
__ mov(c_rarg4, rscratch2_dst_klass); // dst.klass.element_klass
__ b(RuntimeAddress(checkcast_copy_entry));
}
__ BIND(L_failed);
__ mov(r0, -1);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(lr);
return start;
}
void generate_arraycopy_stubs() { void generate_arraycopy_stubs() {
address entry; address entry;
address entry_jbyte_arraycopy; address entry_jbyte_arraycopy;
@ -1655,6 +1989,18 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
/*dest_uninitialized*/true); /*dest_uninitialized*/true);
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy",
entry_jbyte_arraycopy);
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
entry_jbyte_arraycopy,
entry_jshort_arraycopy,
entry_jint_arraycopy,
entry_oop_arraycopy,
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
} }
void generate_math_stubs() { Unimplemented(); } void generate_math_stubs() { Unimplemented(); }
@ -1973,7 +2319,7 @@ class StubGenerator: public StubCodeGenerator {
// c_rarg4 - input length // c_rarg4 - input length
// //
// Output: // Output:
// rax - input length // r0 - input length
// //
address generate_cipherBlockChaining_decryptAESCrypt() { address generate_cipherBlockChaining_decryptAESCrypt() {
assert(UseAES, "need AES instructions and misaligned SSE support"); assert(UseAES, "need AES instructions and misaligned SSE support");