8153797: aarch64: Add Arrays.fill stub code
Reviewed-by: aph
This commit is contained in:
parent
1131e05b66
commit
19d90c789d
@ -4190,55 +4190,6 @@ encode %{
|
||||
}
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
Register cnt_reg = as_Register($cnt$$reg);
|
||||
Register base_reg = as_Register($base$$reg);
|
||||
// base is word aligned
|
||||
// cnt is count of words
|
||||
|
||||
Label loop;
|
||||
Label entry;
|
||||
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = 0;
|
||||
// case 7:
|
||||
// p[-7] = 0;
|
||||
// case 6:
|
||||
// p[-6] = 0;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = 0;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
const int unroll = 8; // Number of str(zr) instructions we'll unroll
|
||||
|
||||
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
|
||||
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
|
||||
// base_reg always points to the end of the region we're about to zero
|
||||
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
|
||||
__ adr(rscratch2, entry);
|
||||
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
__ br(rscratch2);
|
||||
__ bind(loop);
|
||||
__ sub(cnt_reg, cnt_reg, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
__ str(zr, Address(base_reg, i * wordSize));
|
||||
__ bind(entry);
|
||||
__ add(base_reg, base_reg, unroll * wordSize);
|
||||
__ cbnz(cnt_reg, loop);
|
||||
%}
|
||||
|
||||
/// mov envcodings
|
||||
|
||||
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
|
||||
@ -13363,7 +13314,9 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
|
||||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
||||
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
|
||||
ins_encode %{
|
||||
__ zero_words($base$$Register, $cnt$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
@ -4670,6 +4670,61 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
||||
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
void MacroAssembler::zero_words(Register base, Register cnt)
|
||||
{
|
||||
fill_words(base, cnt, zr);
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be filled, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
// value: Value to be filled with.
|
||||
// base will point to the end of the buffer after filling.
|
||||
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
{
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = v;
|
||||
// case 7:
|
||||
// p[-7] = v;
|
||||
// case 6:
|
||||
// p[-6] = v;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = v;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
|
||||
|
||||
Label entry, loop;
|
||||
const int unroll = 8; // Number of str instructions we'll unroll
|
||||
|
||||
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
|
||||
cbz(rscratch1, entry);
|
||||
sub(cnt, cnt, rscratch1); // cnt -= tmp1
|
||||
// base always points to the end of the region we're about to fill
|
||||
add(base, base, rscratch1, Assembler::LSL, 3);
|
||||
adr(rscratch2, entry);
|
||||
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
br(rscratch2);
|
||||
bind(loop);
|
||||
add(base, base, unroll * 8);
|
||||
sub(cnt, cnt, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
str(value, Address(base, i * 8));
|
||||
bind(entry);
|
||||
cbnz(cnt, loop);
|
||||
}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
|
@ -1184,6 +1184,9 @@ public:
|
||||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string);
|
||||
|
||||
void fill_words(Register base, Register cnt, Register value);
|
||||
void zero_words(Register base, Register cnt);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
|
@ -2022,6 +2022,136 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for array fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: c_rarg0
|
||||
// value: c_rarg1
|
||||
// count: c_rarg2 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
const Register to = c_rarg0; // source array address
|
||||
const Register value = c_rarg1; // value
|
||||
const Register count = c_rarg2; // elements count
|
||||
const Register cnt_words = c_rarg3; // temp register
|
||||
|
||||
__ enter();
|
||||
|
||||
Label L_fill_elements, L_exit1;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 0;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 2;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// Align source address at 8 bytes address boundary.
|
||||
Label L_skip_align1, L_skip_align2, L_skip_align4;
|
||||
if (!aligned) {
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
// One byte misalignment happens only for byte arrays.
|
||||
__ tbz(to, 0, L_skip_align1);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ subw(count, count, 1);
|
||||
__ bind(L_skip_align1);
|
||||
// Fallthrough
|
||||
case T_SHORT:
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays.
|
||||
__ tbz(to, 1, L_skip_align2);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ subw(count, count, 2 >> shift);
|
||||
__ bind(L_skip_align2);
|
||||
// Fallthrough
|
||||
case T_INT:
|
||||
// Align to 8 bytes, we know we are 4 byte aligned to start.
|
||||
__ tbz(to, 2, L_skip_align4);
|
||||
__ strw(value, Address(__ post(to, 4)));
|
||||
__ subw(count, count, 4 >> shift);
|
||||
__ bind(L_skip_align4);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Fill large chunks
|
||||
//
|
||||
__ lsrw(cnt_words, count, 3 - shift); // number of words
|
||||
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
|
||||
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
|
||||
__ fill_words(to, cnt_words, value);
|
||||
|
||||
// Remaining count is less than 8 bytes. Fill it by a single store.
|
||||
// Note that the total length is no less than 8 bytes.
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
Label L_exit1;
|
||||
__ cbzw(count, L_exit1);
|
||||
__ add(to, to, count, Assembler::LSL, shift); // points to the end
|
||||
__ str(value, Address(to, -8)); // overwrite some elements
|
||||
__ bind(L_exit1);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
// Handle copies less than 8 bytes.
|
||||
Label L_fill_2, L_fill_4, L_exit2;
|
||||
__ bind(L_fill_elements);
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
__ tbz(count, 0, L_fill_2);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ bind(L_fill_2);
|
||||
__ tbz(count, 1, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 2, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_SHORT:
|
||||
__ tbz(count, 0, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 1, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_INT:
|
||||
__ cbzw(count, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
__ bind(L_exit2);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_arraycopy_stubs() {
|
||||
address entry;
|
||||
address entry_jbyte_arraycopy;
|
||||
@ -2125,6 +2255,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
entry_jlong_arraycopy,
|
||||
entry_checkcast_arraycopy);
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
}
|
||||
|
||||
void generate_math_stubs() { Unimplemented(); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user