8153797: aarch64: Add Arrays.fill stub code

Reviewed-by: aph
This commit is contained in:
Long Chen 2016-04-07 22:36:16 +00:00 committed by Ed Nevill
parent 1131e05b66
commit 19d90c789d
4 changed files with 197 additions and 50 deletions

View File

@ -4190,55 +4190,6 @@ encode %{
}
%}
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
MacroAssembler _masm(&cbuf);
Register cnt_reg = as_Register($cnt$$reg);
Register base_reg = as_Register($base$$reg);
// base is word aligned
// cnt is count of words
Label loop;
Label entry;
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = 0;
// case 7:
// p[-7] = 0;
// case 6:
// p[-6] = 0;
// // ...
// case 1:
// p[-1] = 0;
// case 0:
// p += 8;
// } while (cnt);
// }
const int unroll = 8; // Number of str(zr) instructions we'll unroll
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
// base_reg always points to the end of the region we're about to zero
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
__ adr(rscratch2, entry);
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
__ br(rscratch2);
__ bind(loop);
__ sub(cnt_reg, cnt_reg, unroll);
for (int i = -unroll; i < 0; i++)
__ str(zr, Address(base_reg, i * wordSize));
__ bind(entry);
__ add(base_reg, base_reg, unroll * wordSize);
__ cbnz(cnt_reg, loop);
%}
/// mov envcodings
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
@ -13363,7 +13314,9 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
ins_encode %{
__ zero_words($base$$Register, $cnt$$Register);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -4670,6 +4670,61 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Count in 8-byte unit.
void MacroAssembler::zero_words(Register base, Register cnt)
{
fill_words(base, cnt, zr);
}
// base: Address of a buffer to be filled, 8 bytes aligned.
// cnt: Count in 8-byte unit.
// value: Value to be filled with.
// base will point to the end of the buffer after filling.
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
{
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = v;
// case 7:
// p[-7] = v;
// case 6:
// p[-6] = v;
// // ...
// case 1:
// p[-1] = v;
// case 0:
// p += 8;
// } while (cnt);
// }
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
Label entry, loop;
const int unroll = 8; // Number of str instructions we'll unroll
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
cbz(rscratch1, entry);
sub(cnt, cnt, rscratch1); // cnt -= tmp1
// base always points to the end of the region we're about to fill
add(base, base, rscratch1, Assembler::LSL, 3);
adr(rscratch2, entry);
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
br(rscratch2);
bind(loop);
add(base, base, unroll * 8);
sub(cnt, cnt, unroll);
for (int i = -unroll; i < 0; i++)
str(value, Address(base, i * 8));
bind(entry);
cbnz(cnt, loop);
}
// encode char[] to byte[] in ISO_8859_1
void MacroAssembler::encode_iso_array(Register src, Register dst,

View File

@ -1184,6 +1184,9 @@ public:
Register result, Register cnt1,
int elem_size, bool is_string);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, Register cnt);
void encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,

View File

@ -2022,6 +2022,136 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
//
// Generate stub for array fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: c_rarg0
// value: c_rarg1
// count: c_rarg2 treated as signed
//
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = c_rarg0; // source array address
const Register value = c_rarg1; // value
const Register count = c_rarg2; // elements count
const Register cnt_words = c_rarg3; // temp register
__ enter();
Label L_fill_elements, L_exit1;
int shift = -1;
switch (t) {
case T_BYTE:
shift = 0;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_SHORT:
shift = 1;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_INT:
shift = 2;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ br(Assembler::LO, L_fill_elements);
break;
default: ShouldNotReachHere();
}
// Align source address at 8 bytes address boundary.
Label L_skip_align1, L_skip_align2, L_skip_align4;
if (!aligned) {
switch (t) {
case T_BYTE:
// One byte misalignment happens only for byte arrays.
__ tbz(to, 0, L_skip_align1);
__ strb(value, Address(__ post(to, 1)));
__ subw(count, count, 1);
__ bind(L_skip_align1);
// Fallthrough
case T_SHORT:
// Two bytes misalignment happens only for byte and short (char) arrays.
__ tbz(to, 1, L_skip_align2);
__ strh(value, Address(__ post(to, 2)));
__ subw(count, count, 2 >> shift);
__ bind(L_skip_align2);
// Fallthrough
case T_INT:
// Align to 8 bytes, we know we are 4 byte aligned to start.
__ tbz(to, 2, L_skip_align4);
__ strw(value, Address(__ post(to, 4)));
__ subw(count, count, 4 >> shift);
__ bind(L_skip_align4);
break;
default: ShouldNotReachHere();
}
}
//
// Fill large chunks
//
__ lsrw(cnt_words, count, 3 - shift); // number of words
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
__ fill_words(to, cnt_words, value);
// Remaining count is less than 8 bytes. Fill it by a single store.
// Note that the total length is no less than 8 bytes.
if (t == T_BYTE || t == T_SHORT) {
Label L_exit1;
__ cbzw(count, L_exit1);
__ add(to, to, count, Assembler::LSL, shift); // points to the end
__ str(value, Address(to, -8)); // overwrite some elements
__ bind(L_exit1);
__ leave();
__ ret(lr);
}
// Handle copies less than 8 bytes.
Label L_fill_2, L_fill_4, L_exit2;
__ bind(L_fill_elements);
switch (t) {
case T_BYTE:
__ tbz(count, 0, L_fill_2);
__ strb(value, Address(__ post(to, 1)));
__ bind(L_fill_2);
__ tbz(count, 1, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 2, L_exit2);
__ strw(value, Address(to));
break;
case T_SHORT:
__ tbz(count, 0, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 1, L_exit2);
__ strw(value, Address(to));
break;
case T_INT:
__ cbzw(count, L_exit2);
__ strw(value, Address(to));
break;
default: ShouldNotReachHere();
}
__ bind(L_exit2);
__ leave();
__ ret(lr);
return start;
}
void generate_arraycopy_stubs() {
address entry;
address entry_jbyte_arraycopy;
@ -2125,6 +2255,12 @@ class StubGenerator: public StubCodeGenerator {
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
}
void generate_math_stubs() { Unimplemented(); }