8302780: Add support for vectorized arraycopy GC barriers
Co-authored-by: Yadong Wang <yadongwang@openjdk.org> Reviewed-by: ayang, fyang, rcastanedalo, aph
This commit is contained in:
parent
d00a767047
commit
5f153e056b
src/hotspot/cpu
aarch64
riscv
x86
@ -119,6 +119,111 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst1,
|
||||
Register dst2,
|
||||
Address src,
|
||||
Register tmp) {
|
||||
if (bytes == 1) {
|
||||
assert(dst2 == noreg, "invariant");
|
||||
__ ldrb(dst1, src);
|
||||
} else if (bytes == 2) {
|
||||
assert(dst2 == noreg, "invariant");
|
||||
__ ldrh(dst1, src);
|
||||
} else if (bytes == 4) {
|
||||
assert(dst2 == noreg, "invariant");
|
||||
__ ldrw(dst1, src);
|
||||
} else if (bytes == 8) {
|
||||
assert(dst2 == noreg, "invariant");
|
||||
__ ldr(dst1, src);
|
||||
} else if (bytes == 16) {
|
||||
assert(dst2 != noreg, "invariant");
|
||||
assert(dst2 != dst1, "invariant");
|
||||
__ ldp(dst1, dst2, src);
|
||||
} else {
|
||||
// Not the right size
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ decode_heap_oop(dst1);
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src1,
|
||||
Register src2,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3) {
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ encode_heap_oop(src1);
|
||||
}
|
||||
if (bytes == 1) {
|
||||
assert(src2 == noreg, "invariant");
|
||||
__ strb(src1, dst);
|
||||
} else if (bytes == 2) {
|
||||
assert(src2 == noreg, "invariant");
|
||||
__ strh(src1, dst);
|
||||
} else if (bytes == 4) {
|
||||
assert(src2 == noreg, "invariant");
|
||||
__ strw(src1, dst);
|
||||
} else if (bytes == 8) {
|
||||
assert(src2 == noreg, "invariant");
|
||||
__ str(src1, dst);
|
||||
} else if (bytes == 16) {
|
||||
assert(src2 != noreg, "invariant");
|
||||
assert(src2 != src1, "invariant");
|
||||
__ stp(src1, src2, dst);
|
||||
} else {
|
||||
// Not the right size
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
FloatRegister dst1,
|
||||
FloatRegister dst2,
|
||||
Address src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
FloatRegister vec_tmp) {
|
||||
if (bytes == 32) {
|
||||
__ ldpq(dst1, dst2, src);
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
FloatRegister src1,
|
||||
FloatRegister src2,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3,
|
||||
FloatRegister vec_tmp1,
|
||||
FloatRegister vec_tmp2,
|
||||
FloatRegister vec_tmp3) {
|
||||
if (bytes == 32) {
|
||||
__ stpq(src1, src2, dst);
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath) {
|
||||
// If mask changes we need to ensure that the inverse is still encodable as an immediate
|
||||
|
@ -48,6 +48,52 @@ public:
|
||||
Register src, Register dst, Register count, RegSet saved_regs) {}
|
||||
virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
||||
Register start, Register count, Register tmp, RegSet saved_regs) {}
|
||||
|
||||
virtual void copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst1,
|
||||
Register dst2,
|
||||
Address src,
|
||||
Register tmp);
|
||||
|
||||
virtual void copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src1,
|
||||
Register src2,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3);
|
||||
|
||||
virtual void copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
FloatRegister dst1,
|
||||
FloatRegister dst2,
|
||||
Address src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
FloatRegister vec_tmp);
|
||||
|
||||
virtual void copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
FloatRegister src1,
|
||||
FloatRegister src2,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3,
|
||||
FloatRegister vec_tmp1,
|
||||
FloatRegister vec_tmp2,
|
||||
FloatRegister vec_tmp3);
|
||||
|
||||
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Register dst, Address src, Register tmp1, Register tmp2);
|
||||
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
@ -696,6 +696,79 @@ class StubGenerator: public StubCodeGenerator {
|
||||
copy_backwards = -1
|
||||
} copy_direction;
|
||||
|
||||
// Helper object to reduce noise when telling the GC barriers how to perform loads and stores
|
||||
// for arraycopy stubs.
|
||||
class ArrayCopyBarrierSetHelper : StackObj {
|
||||
BarrierSetAssembler* _bs_asm;
|
||||
MacroAssembler* _masm;
|
||||
DecoratorSet _decorators;
|
||||
BasicType _type;
|
||||
Register _gct1;
|
||||
Register _gct2;
|
||||
Register _gct3;
|
||||
FloatRegister _gcvt1;
|
||||
FloatRegister _gcvt2;
|
||||
FloatRegister _gcvt3;
|
||||
|
||||
public:
|
||||
ArrayCopyBarrierSetHelper(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
Register gct1,
|
||||
Register gct2,
|
||||
Register gct3,
|
||||
FloatRegister gcvt1,
|
||||
FloatRegister gcvt2,
|
||||
FloatRegister gcvt3)
|
||||
: _bs_asm(BarrierSet::barrier_set()->barrier_set_assembler()),
|
||||
_masm(masm),
|
||||
_decorators(decorators),
|
||||
_type(type),
|
||||
_gct1(gct1),
|
||||
_gct2(gct2),
|
||||
_gct3(gct3),
|
||||
_gcvt1(gcvt1),
|
||||
_gcvt2(gcvt2),
|
||||
_gcvt3(gcvt3) {
|
||||
}
|
||||
|
||||
void copy_load_at_32(FloatRegister dst1, FloatRegister dst2, Address src) {
|
||||
_bs_asm->copy_load_at(_masm, _decorators, _type, 32,
|
||||
dst1, dst2, src,
|
||||
_gct1, _gct2, _gcvt1);
|
||||
}
|
||||
|
||||
void copy_store_at_32(Address dst, FloatRegister src1, FloatRegister src2) {
|
||||
_bs_asm->copy_store_at(_masm, _decorators, _type, 32,
|
||||
dst, src1, src2,
|
||||
_gct1, _gct2, _gct3, _gcvt1, _gcvt2, _gcvt3);
|
||||
}
|
||||
|
||||
void copy_load_at_16(Register dst1, Register dst2, Address src) {
|
||||
_bs_asm->copy_load_at(_masm, _decorators, _type, 16,
|
||||
dst1, dst2, src,
|
||||
_gct1);
|
||||
}
|
||||
|
||||
void copy_store_at_16(Address dst, Register src1, Register src2) {
|
||||
_bs_asm->copy_store_at(_masm, _decorators, _type, 16,
|
||||
dst, src1, src2,
|
||||
_gct1, _gct2, _gct3);
|
||||
}
|
||||
|
||||
void copy_load_at_8(Register dst, Address src) {
|
||||
_bs_asm->copy_load_at(_masm, _decorators, _type, 8,
|
||||
dst, noreg, src,
|
||||
_gct1);
|
||||
}
|
||||
|
||||
void copy_store_at_8(Address dst, Register src) {
|
||||
_bs_asm->copy_store_at(_masm, _decorators, _type, 8,
|
||||
dst, src, noreg,
|
||||
_gct1, _gct2, _gct3);
|
||||
}
|
||||
};
|
||||
|
||||
// Bulk copy of blocks of 8 words.
|
||||
//
|
||||
// count is a count of words.
|
||||
@ -709,17 +782,20 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
// s and d are adjusted to point to the remaining words to copy
|
||||
//
|
||||
void generate_copy_longs(Label &start, Register s, Register d, Register count,
|
||||
void generate_copy_longs(DecoratorSet decorators, BasicType type, Label &start, Register s, Register d, Register count,
|
||||
copy_direction direction) {
|
||||
int unit = wordSize * direction;
|
||||
int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
|
||||
|
||||
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
|
||||
t4 = r7, t5 = r10, t6 = r11, t7 = r12;
|
||||
const Register stride = r13;
|
||||
t4 = r7, t5 = r11, t6 = r12, t7 = r13;
|
||||
const Register stride = r14;
|
||||
const Register gct1 = rscratch1, gct2 = rscratch2, gct3 = r10;
|
||||
const FloatRegister gcvt1 = v6, gcvt2 = v7, gcvt3 = v8;
|
||||
ArrayCopyBarrierSetHelper bs(_masm, decorators, type, gct1, gct2, gct3, gcvt1, gcvt2, gcvt3);
|
||||
|
||||
assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
|
||||
assert_different_registers(s, d, count, rscratch1);
|
||||
assert_different_registers(rscratch1, rscratch2, t0, t1, t2, t3, t4, t5, t6, t7);
|
||||
assert_different_registers(s, d, count, rscratch1, rscratch2);
|
||||
|
||||
Label again, drain;
|
||||
const char *stub_name;
|
||||
@ -757,13 +833,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// Fill 8 registers
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_load_at_32(v0, v1, Address(s, 4 * unit));
|
||||
bs.copy_load_at_32(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 4 * unit));
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 6 * unit));
|
||||
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
}
|
||||
|
||||
__ subs(count, count, 16);
|
||||
@ -783,19 +859,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
|
||||
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||
__ ldpq(v0, v1, Address(s, 4 * unit));
|
||||
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||
__ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_store_at_32(Address(d, 4 * unit), v0, v1);
|
||||
bs.copy_load_at_32(v0, v1, Address(s, 4 * unit));
|
||||
bs.copy_store_at_32(Address(__ pre(d, 8 * unit)), v2, v3);
|
||||
bs.copy_load_at_32(v2, v3, Address(__ pre(s, 8 * unit)));
|
||||
} else {
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t0, t1);
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_store_at_16(Address(d, 4 * unit), t2, t3);
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 4 * unit));
|
||||
bs.copy_store_at_16(Address(d, 6 * unit), t4, t5);
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 6 * unit));
|
||||
bs.copy_store_at_16(Address(__ pre(d, 8 * unit)), t6, t7);
|
||||
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
}
|
||||
|
||||
__ subs(count, count, 8);
|
||||
@ -804,26 +880,26 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// Drain
|
||||
__ bind(drain);
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ stpq(v0, v1, Address(d, 4 * unit));
|
||||
__ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
|
||||
bs.copy_store_at_32(Address(d, 4 * unit), v0, v1);
|
||||
bs.copy_store_at_32(Address(__ pre(d, 8 * unit)), v2, v3);
|
||||
} else {
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(d, 4 * unit));
|
||||
__ stp(t4, t5, Address(d, 6 * unit));
|
||||
__ stp(t6, t7, Address(__ pre(d, 8 * unit)));
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t0, t1);
|
||||
bs.copy_store_at_16(Address(d, 4 * unit), t2, t3);
|
||||
bs.copy_store_at_16(Address(d, 6 * unit), t4, t5);
|
||||
bs.copy_store_at_16(Address(__ pre(d, 8 * unit)), t6, t7);
|
||||
}
|
||||
|
||||
{
|
||||
Label L1, L2;
|
||||
__ tbz(count, exact_log2(4), L1);
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
|
||||
__ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
|
||||
bs.copy_load_at_32(v0, v1, Address(__ pre(s, 4 * unit)));
|
||||
bs.copy_store_at_32(Address(__ pre(d, 4 * unit)), v0, v1);
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||
__ stp(t0, t1, Address(d, 2 * unit));
|
||||
__ stp(t2, t3, Address(__ pre(d, 4 * unit)));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_load_at_16(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t0, t1);
|
||||
bs.copy_store_at_16(Address(__ pre(d, 4 * unit)), t2, t3);
|
||||
}
|
||||
__ bind(L1);
|
||||
|
||||
@ -833,8 +909,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
__ tbz(count, 1, L2);
|
||||
__ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
__ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
|
||||
bs.copy_load_at_16(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
|
||||
bs.copy_store_at_16(Address(__ adjust(d, 2 * unit, direction == copy_backwards)), t0, t1);
|
||||
__ bind(L2);
|
||||
}
|
||||
|
||||
@ -893,10 +969,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// t4 at offset -48, t5 at offset -40
|
||||
// t6 at offset -64, t7 at offset -56
|
||||
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 4 * unit));
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 6 * unit));
|
||||
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
|
||||
__ subs(count, count, 16);
|
||||
__ br(Assembler::LO, drain);
|
||||
@ -925,15 +1001,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// t5 at offset 40, t6 at offset 48
|
||||
// t7 at offset 56
|
||||
|
||||
__ str(t0, Address(d, 1 * unit));
|
||||
__ stp(t1, t2, Address(d, 2 * unit));
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ stp(t3, t4, Address(d, 4 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ stp(t5, t6, Address(d, 6 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ str(t7, Address(__ pre(d, 8 * unit)));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t0);
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t1, t2);
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_store_at_16(Address(d, 4 * unit), t3, t4);
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 4 * unit));
|
||||
bs.copy_store_at_16(Address(d, 6 * unit), t5, t6);
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 6 * unit));
|
||||
bs.copy_store_at_8(Address(__ pre(d, 8 * unit)), t7);
|
||||
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
} else {
|
||||
// d was not offset when we started so the registers are
|
||||
// written into the 64 bit block preceding d with the following
|
||||
@ -948,15 +1024,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// note that this matches the offsets previously noted for the
|
||||
// loads
|
||||
|
||||
__ str(t1, Address(d, 1 * unit));
|
||||
__ stp(t3, t0, Address(d, 3 * unit));
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ stp(t5, t2, Address(d, 5 * unit));
|
||||
__ ldp(t2, t3, Address(s, 4 * unit));
|
||||
__ stp(t7, t4, Address(d, 7 * unit));
|
||||
__ ldp(t4, t5, Address(s, 6 * unit));
|
||||
__ str(t6, Address(__ pre(d, 8 * unit)));
|
||||
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t1);
|
||||
bs.copy_store_at_16(Address(d, 3 * unit), t3, t0);
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_store_at_16(Address(d, 5 * unit), t5, t2);
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 4 * unit));
|
||||
bs.copy_store_at_16(Address(d, 7 * unit), t7, t4);
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 6 * unit));
|
||||
bs.copy_store_at_8(Address(__ pre(d, 8 * unit)), t6);
|
||||
bs.copy_load_at_16(t6, t7, Address(__ pre(s, 8 * unit)));
|
||||
}
|
||||
|
||||
__ subs(count, count, 8);
|
||||
@ -968,17 +1044,17 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// as above
|
||||
__ bind(drain);
|
||||
if (direction == copy_forwards) {
|
||||
__ str(t0, Address(d, 1 * unit));
|
||||
__ stp(t1, t2, Address(d, 2 * unit));
|
||||
__ stp(t3, t4, Address(d, 4 * unit));
|
||||
__ stp(t5, t6, Address(d, 6 * unit));
|
||||
__ str(t7, Address(__ pre(d, 8 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t0);
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t1, t2);
|
||||
bs.copy_store_at_16(Address(d, 4 * unit), t3, t4);
|
||||
bs.copy_store_at_16(Address(d, 6 * unit), t5, t6);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 8 * unit)), t7);
|
||||
} else {
|
||||
__ str(t1, Address(d, 1 * unit));
|
||||
__ stp(t3, t0, Address(d, 3 * unit));
|
||||
__ stp(t5, t2, Address(d, 5 * unit));
|
||||
__ stp(t7, t4, Address(d, 7 * unit));
|
||||
__ str(t6, Address(__ pre(d, 8 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t1);
|
||||
bs.copy_store_at_16(Address(d, 3 * unit), t3, t0);
|
||||
bs.copy_store_at_16(Address(d, 5 * unit), t5, t2);
|
||||
bs.copy_store_at_16(Address(d, 7 * unit), t7, t4);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 8 * unit)), t6);
|
||||
}
|
||||
// now we need to copy any remaining part block which may
|
||||
// include a 4 word block subblock and/or a 2 word subblock.
|
||||
@ -991,16 +1067,16 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// with only one intervening stp between the str instructions
|
||||
// but note that the offsets and registers still follow the
|
||||
// same pattern
|
||||
__ ldp(t0, t1, Address(s, 2 * unit));
|
||||
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 2 * unit));
|
||||
bs.copy_load_at_16(t2, t3, Address(__ pre(s, 4 * unit)));
|
||||
if (direction == copy_forwards) {
|
||||
__ str(t0, Address(d, 1 * unit));
|
||||
__ stp(t1, t2, Address(d, 2 * unit));
|
||||
__ str(t3, Address(__ pre(d, 4 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t0);
|
||||
bs.copy_store_at_16(Address(d, 2 * unit), t1, t2);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 4 * unit)), t3);
|
||||
} else {
|
||||
__ str(t1, Address(d, 1 * unit));
|
||||
__ stp(t3, t0, Address(d, 3 * unit));
|
||||
__ str(t2, Address(__ pre(d, 4 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t1);
|
||||
bs.copy_store_at_16(Address(d, 3 * unit), t3, t0);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 4 * unit)), t2);
|
||||
}
|
||||
__ bind(L1);
|
||||
|
||||
@ -1009,13 +1085,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// there is no intervening stp between the str instructions
|
||||
// but note that the offset and register patterns are still
|
||||
// the same
|
||||
__ ldp(t0, t1, Address(__ pre(s, 2 * unit)));
|
||||
bs.copy_load_at_16(t0, t1, Address(__ pre(s, 2 * unit)));
|
||||
if (direction == copy_forwards) {
|
||||
__ str(t0, Address(d, 1 * unit));
|
||||
__ str(t1, Address(__ pre(d, 2 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t0);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 2 * unit)), t1);
|
||||
} else {
|
||||
__ str(t1, Address(d, 1 * unit));
|
||||
__ str(t0, Address(__ pre(d, 2 * unit)));
|
||||
bs.copy_store_at_8(Address(d, 1 * unit), t1);
|
||||
bs.copy_store_at_8(Address(__ pre(d, 2 * unit)), t0);
|
||||
}
|
||||
__ bind(L2);
|
||||
|
||||
@ -1038,18 +1114,19 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// NB: Ignores all of the bits of count which represent more than 15
|
||||
// bytes, so a caller doesn't have to mask them.
|
||||
|
||||
void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) {
|
||||
void copy_memory_small(DecoratorSet decorators, BasicType type, Register s, Register d, Register count, int step) {
|
||||
bool is_backwards = step < 0;
|
||||
size_t granularity = uabs(step);
|
||||
int direction = is_backwards ? -1 : 1;
|
||||
int unit = wordSize * direction;
|
||||
|
||||
Label Lword, Lint, Lshort, Lbyte;
|
||||
|
||||
assert(granularity
|
||||
&& granularity <= sizeof (jlong), "Impossible granularity in copy_memory_small");
|
||||
|
||||
const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6;
|
||||
const Register t0 = r3;
|
||||
const Register gct1 = rscratch1, gct2 = rscratch2, gct3 = r10;
|
||||
ArrayCopyBarrierSetHelper bs(_masm, decorators, type, gct1, gct2, gct3, fnoreg, fnoreg, fnoreg);
|
||||
|
||||
// ??? I don't know if this bit-test-and-branch is the right thing
|
||||
// to do. It does a lot of jumping, resulting in several
|
||||
@ -1057,33 +1134,35 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// with something like Duff's device with a single computed branch.
|
||||
|
||||
__ tbz(count, 3 - exact_log2(granularity), Lword);
|
||||
__ ldr(tmp, Address(__ adjust(s, unit, is_backwards)));
|
||||
__ str(tmp, Address(__ adjust(d, unit, is_backwards)));
|
||||
bs.copy_load_at_8(t0, Address(__ adjust(s, direction * wordSize, is_backwards)));
|
||||
bs.copy_store_at_8(Address(__ adjust(d, direction * wordSize, is_backwards)), t0);
|
||||
__ bind(Lword);
|
||||
|
||||
if (granularity <= sizeof (jint)) {
|
||||
__ tbz(count, 2 - exact_log2(granularity), Lint);
|
||||
__ ldrw(tmp, Address(__ adjust(s, sizeof (jint) * direction, is_backwards)));
|
||||
__ strw(tmp, Address(__ adjust(d, sizeof (jint) * direction, is_backwards)));
|
||||
__ ldrw(t0, Address(__ adjust(s, sizeof (jint) * direction, is_backwards)));
|
||||
__ strw(t0, Address(__ adjust(d, sizeof (jint) * direction, is_backwards)));
|
||||
__ bind(Lint);
|
||||
}
|
||||
|
||||
if (granularity <= sizeof (jshort)) {
|
||||
__ tbz(count, 1 - exact_log2(granularity), Lshort);
|
||||
__ ldrh(tmp, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards)));
|
||||
__ strh(tmp, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards)));
|
||||
__ ldrh(t0, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards)));
|
||||
__ strh(t0, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards)));
|
||||
__ bind(Lshort);
|
||||
}
|
||||
|
||||
if (granularity <= sizeof (jbyte)) {
|
||||
__ tbz(count, 0, Lbyte);
|
||||
__ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
|
||||
__ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
|
||||
__ ldrb(t0, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
|
||||
__ strb(t0, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
|
||||
__ bind(Lbyte);
|
||||
}
|
||||
}
|
||||
|
||||
Label copy_f, copy_b;
|
||||
Label copy_obj_f, copy_obj_b;
|
||||
Label copy_obj_uninit_f, copy_obj_uninit_b;
|
||||
|
||||
// All-singing all-dancing memory copy.
|
||||
//
|
||||
@ -1092,8 +1171,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// of copy. If is_aligned is false, we align the source address.
|
||||
//
|
||||
|
||||
void copy_memory(bool is_aligned, Register s, Register d,
|
||||
Register count, Register tmp, int step) {
|
||||
void copy_memory(DecoratorSet decorators, BasicType type, bool is_aligned,
|
||||
Register s, Register d, Register count, int step) {
|
||||
copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
|
||||
bool is_backwards = step < 0;
|
||||
unsigned int granularity = uabs(step);
|
||||
@ -1102,9 +1181,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// <= 80 (or 96 for SIMD) bytes do inline. Direction doesn't matter because we always
|
||||
// load all the data before writing anything
|
||||
Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
|
||||
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
|
||||
const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
|
||||
const Register t2 = r5, t3 = r6, t4 = r7, t5 = r11;
|
||||
const Register t6 = r12, t7 = r13, t8 = r14, t9 = r15;
|
||||
const Register send = r17, dend = r16;
|
||||
const Register gct1 = rscratch1, gct2 = rscratch2, gct3 = r10;
|
||||
const FloatRegister gcvt1 = v6, gcvt2 = v7, gcvt3 = v8;
|
||||
ArrayCopyBarrierSetHelper bs(_masm, decorators, type, gct1, gct2, gct3, gcvt1, gcvt2, gcvt3);
|
||||
|
||||
if (PrefetchCopyIntervalInBytes > 0)
|
||||
__ prfm(Address(s, 0), PLDL1KEEP);
|
||||
@ -1125,37 +1207,38 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// 33..64 bytes
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 0));
|
||||
__ ldpq(v2, v3, Address(send, -32));
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(dend, -32));
|
||||
bs.copy_load_at_32(v0, v1, Address(s, 0));
|
||||
bs.copy_load_at_32(v2, v3, Address(send, -32));
|
||||
bs.copy_store_at_32(Address(d, 0), v0, v1);
|
||||
bs.copy_store_at_32(Address(dend, -32), v2, v3);
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(s, 16));
|
||||
__ ldp(t4, t5, Address(send, -32));
|
||||
__ ldp(t6, t7, Address(send, -16));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 0));
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 16));
|
||||
bs.copy_load_at_16(t4, t5, Address(send, -32));
|
||||
bs.copy_load_at_16(t6, t7, Address(send, -16));
|
||||
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(d, 16));
|
||||
__ stp(t4, t5, Address(dend, -32));
|
||||
__ stp(t6, t7, Address(dend, -16));
|
||||
bs.copy_store_at_16(Address(d, 0), t0, t1);
|
||||
bs.copy_store_at_16(Address(d, 16), t2, t3);
|
||||
bs.copy_store_at_16(Address(dend, -32), t4, t5);
|
||||
bs.copy_store_at_16(Address(dend, -16), t6, t7);
|
||||
}
|
||||
__ b(finish);
|
||||
|
||||
// 17..32 bytes
|
||||
__ bind(copy32);
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(send, -16));
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(dend, -16));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 0));
|
||||
bs.copy_load_at_16(t6, t7, Address(send, -16));
|
||||
|
||||
bs.copy_store_at_16(Address(d, 0), t0, t1);
|
||||
bs.copy_store_at_16(Address(dend, -16), t6, t7);
|
||||
__ b(finish);
|
||||
|
||||
// 65..80/96 bytes
|
||||
// (96 bytes if SIMD because we do 32 byes per instruction)
|
||||
__ bind(copy80);
|
||||
if (UseSIMDForMemoryOps) {
|
||||
__ ldpq(v0, v1, Address(s, 0));
|
||||
__ ldpq(v2, v3, Address(s, 32));
|
||||
bs.copy_load_at_32(v0, v1, Address(s, 0));
|
||||
bs.copy_load_at_32(v2, v3, Address(s, 32));
|
||||
// Unaligned pointers can be an issue for copying.
|
||||
// The issue has more chances to happen when granularity of data is
|
||||
// less than 4(sizeof(jint)). Pointers for arrays of jint are at least
|
||||
@ -1167,32 +1250,34 @@ class StubGenerator: public StubCodeGenerator {
|
||||
Label copy96;
|
||||
__ cmp(count, u1(80/granularity));
|
||||
__ br(Assembler::HI, copy96);
|
||||
__ ldp(t0, t1, Address(send, -16));
|
||||
bs.copy_load_at_16(t0, t1, Address(send, -16));
|
||||
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(d, 32));
|
||||
__ stp(t0, t1, Address(dend, -16));
|
||||
bs.copy_store_at_32(Address(d, 0), v0, v1);
|
||||
bs.copy_store_at_32(Address(d, 32), v2, v3);
|
||||
|
||||
bs.copy_store_at_16(Address(dend, -16), t0, t1);
|
||||
__ b(finish);
|
||||
|
||||
__ bind(copy96);
|
||||
}
|
||||
__ ldpq(v4, v5, Address(send, -32));
|
||||
bs.copy_load_at_32(v4, v5, Address(send, -32));
|
||||
|
||||
__ stpq(v0, v1, Address(d, 0));
|
||||
__ stpq(v2, v3, Address(d, 32));
|
||||
__ stpq(v4, v5, Address(dend, -32));
|
||||
bs.copy_store_at_32(Address(d, 0), v0, v1);
|
||||
bs.copy_store_at_32(Address(d, 32), v2, v3);
|
||||
|
||||
bs.copy_store_at_32(Address(dend, -32), v4, v5);
|
||||
} else {
|
||||
__ ldp(t0, t1, Address(s, 0));
|
||||
__ ldp(t2, t3, Address(s, 16));
|
||||
__ ldp(t4, t5, Address(s, 32));
|
||||
__ ldp(t6, t7, Address(s, 48));
|
||||
__ ldp(t8, t9, Address(send, -16));
|
||||
bs.copy_load_at_16(t0, t1, Address(s, 0));
|
||||
bs.copy_load_at_16(t2, t3, Address(s, 16));
|
||||
bs.copy_load_at_16(t4, t5, Address(s, 32));
|
||||
bs.copy_load_at_16(t6, t7, Address(s, 48));
|
||||
bs.copy_load_at_16(t8, t9, Address(send, -16));
|
||||
|
||||
__ stp(t0, t1, Address(d, 0));
|
||||
__ stp(t2, t3, Address(d, 16));
|
||||
__ stp(t4, t5, Address(d, 32));
|
||||
__ stp(t6, t7, Address(d, 48));
|
||||
__ stp(t8, t9, Address(dend, -16));
|
||||
bs.copy_store_at_16(Address(d, 0), t0, t1);
|
||||
bs.copy_store_at_16(Address(d, 16), t2, t3);
|
||||
bs.copy_store_at_16(Address(d, 32), t4, t5);
|
||||
bs.copy_store_at_16(Address(d, 48), t6, t7);
|
||||
bs.copy_store_at_16(Address(dend, -16), t8, t9);
|
||||
}
|
||||
__ b(finish);
|
||||
|
||||
@ -1202,10 +1287,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ br(Assembler::LO, copy8);
|
||||
|
||||
// 8..16 bytes
|
||||
__ ldr(t0, Address(s, 0));
|
||||
__ ldr(t1, Address(send, -8));
|
||||
__ str(t0, Address(d, 0));
|
||||
__ str(t1, Address(dend, -8));
|
||||
bs.copy_load_at_8(t0, Address(s, 0));
|
||||
bs.copy_load_at_8(t1, Address(send, -8));
|
||||
bs.copy_store_at_8(Address(d, 0), t0);
|
||||
bs.copy_store_at_8(Address(dend, -8), t1);
|
||||
__ b(finish);
|
||||
|
||||
if (granularity < 8) {
|
||||
@ -1252,26 +1337,31 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// Now we've got the small case out of the way we can align the
|
||||
// source address on a 2-word boundary.
|
||||
|
||||
// Here we will materialize a count in r15, which is used by copy_memory_small
|
||||
// and the various generate_copy_longs stubs that we use for 2 word aligned bytes.
|
||||
// Up until here, we have used t9, which aliases r15, but from here on, that register
|
||||
// can not be used as a temp register, as it contains the count.
|
||||
|
||||
Label aligned;
|
||||
|
||||
if (is_aligned) {
|
||||
// We may have to adjust by 1 word to get s 2-word-aligned.
|
||||
__ tbz(s, exact_log2(wordSize), aligned);
|
||||
__ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards)));
|
||||
__ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards)));
|
||||
bs.copy_load_at_8(t0, Address(__ adjust(s, direction * wordSize, is_backwards)));
|
||||
bs.copy_store_at_8(Address(__ adjust(d, direction * wordSize, is_backwards)), t0);
|
||||
__ sub(count, count, wordSize/granularity);
|
||||
} else {
|
||||
if (is_backwards) {
|
||||
__ andr(rscratch2, s, 2 * wordSize - 1);
|
||||
__ andr(r15, s, 2 * wordSize - 1);
|
||||
} else {
|
||||
__ neg(rscratch2, s);
|
||||
__ andr(rscratch2, rscratch2, 2 * wordSize - 1);
|
||||
__ neg(r15, s);
|
||||
__ andr(r15, r15, 2 * wordSize - 1);
|
||||
}
|
||||
// rscratch2 is the byte adjustment needed to align s.
|
||||
__ cbz(rscratch2, aligned);
|
||||
// r15 is the byte adjustment needed to align s.
|
||||
__ cbz(r15, aligned);
|
||||
int shift = exact_log2(granularity);
|
||||
if (shift) __ lsr(rscratch2, rscratch2, shift);
|
||||
__ sub(count, count, rscratch2);
|
||||
if (shift) __ lsr(r15, r15, shift);
|
||||
__ sub(count, count, r15);
|
||||
|
||||
#if 0
|
||||
// ?? This code is only correct for a disjoint copy. It may or
|
||||
@ -1283,14 +1373,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// Align s and d, adjust count
|
||||
if (is_backwards) {
|
||||
__ sub(s, s, rscratch2);
|
||||
__ sub(d, d, rscratch2);
|
||||
__ sub(s, s, r15);
|
||||
__ sub(d, d, r15);
|
||||
} else {
|
||||
__ add(s, s, rscratch2);
|
||||
__ add(d, d, rscratch2);
|
||||
__ add(s, s, r15);
|
||||
__ add(d, d, r15);
|
||||
}
|
||||
#else
|
||||
copy_memory_small(s, d, rscratch2, rscratch1, step);
|
||||
copy_memory_small(decorators, type, s, d, r15, step);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1300,14 +1390,27 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// We have a count of units and some trailing bytes. Adjust the
|
||||
// count and do a bulk copy of words.
|
||||
__ lsr(rscratch2, count, exact_log2(wordSize/granularity));
|
||||
if (direction == copy_forwards)
|
||||
__ bl(copy_f);
|
||||
else
|
||||
__ bl(copy_b);
|
||||
__ lsr(r15, count, exact_log2(wordSize/granularity));
|
||||
if (direction == copy_forwards) {
|
||||
if (type != T_OBJECT) {
|
||||
__ bl(copy_f);
|
||||
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
|
||||
__ bl(copy_obj_uninit_f);
|
||||
} else {
|
||||
__ bl(copy_obj_f);
|
||||
}
|
||||
} else {
|
||||
if (type != T_OBJECT) {
|
||||
__ bl(copy_b);
|
||||
} else if ((decorators & IS_DEST_UNINITIALIZED) != 0) {
|
||||
__ bl(copy_obj_uninit_b);
|
||||
} else {
|
||||
__ bl(copy_obj_b);
|
||||
}
|
||||
}
|
||||
|
||||
// And the tail.
|
||||
copy_memory_small(s, d, count, tmp, step);
|
||||
copy_memory_small(decorators, type, s, d, count, step);
|
||||
|
||||
if (granularity >= 8) __ bind(copy8);
|
||||
if (granularity >= 4) __ bind(copy4);
|
||||
@ -1402,7 +1505,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
||||
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
||||
copy_memory(aligned, s, d, count, rscratch1, size);
|
||||
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, size);
|
||||
}
|
||||
|
||||
if (is_oop) {
|
||||
@ -1473,7 +1576,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
||||
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
||||
copy_memory(aligned, s, d, count, rscratch1, -size);
|
||||
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, -size);
|
||||
}
|
||||
if (is_oop) {
|
||||
__ pop(RegSet::of(d, count), sp);
|
||||
@ -1764,6 +1867,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register start_to = r20; // destination array start address
|
||||
const Register r19_klass = r19; // oop._klass
|
||||
|
||||
// Registers used as gc temps (r5, r6, r7 are save-on-call)
|
||||
const Register gct1 = r5, gct2 = r6, gct3 = r7;
|
||||
|
||||
//---------------------------------------------------------------
|
||||
// Assembler stub will be used for this call to arraycopy
|
||||
// if the two arrays are subtypes of Object[] but the
|
||||
@ -1816,6 +1922,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
|
||||
bool is_oop = true;
|
||||
int element_size = UseCompressedOops ? 4 : 8;
|
||||
if (dest_uninitialized) {
|
||||
decorators |= IS_DEST_UNINITIALIZED;
|
||||
}
|
||||
@ -1841,13 +1948,17 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_store_element);
|
||||
__ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
|
||||
bs->copy_store_at(_masm, decorators, T_OBJECT, element_size,
|
||||
__ post(to, element_size), copied_oop, noreg,
|
||||
gct1, gct2, gct3);
|
||||
__ sub(count, count, 1);
|
||||
__ cbz(count, L_do_card_marks);
|
||||
|
||||
// ======== loop entry is here ========
|
||||
__ BIND(L_load_element);
|
||||
__ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
|
||||
bs->copy_load_at(_masm, decorators, T_OBJECT, element_size,
|
||||
copied_oop, noreg, __ post(from, element_size),
|
||||
gct1);
|
||||
__ cbz(copied_oop, L_store_element);
|
||||
|
||||
__ load_klass(r19_klass, copied_oop);// query the object klass
|
||||
@ -2444,8 +2555,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
address entry_jlong_arraycopy;
|
||||
address entry_checkcast_arraycopy;
|
||||
|
||||
generate_copy_longs(copy_f, r0, r1, rscratch2, copy_forwards);
|
||||
generate_copy_longs(copy_b, r0, r1, rscratch2, copy_backwards);
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY, T_BYTE, copy_f, r0, r1, r15, copy_forwards);
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY, T_BYTE, copy_b, r0, r1, r15, copy_backwards);
|
||||
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY, T_OBJECT, copy_obj_f, r0, r1, r15, copy_forwards);
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY, T_OBJECT, copy_obj_b, r0, r1, r15, copy_backwards);
|
||||
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, T_OBJECT, copy_obj_uninit_f, r0, r1, r15, copy_forwards);
|
||||
generate_copy_longs(IN_HEAP | IS_ARRAY | IS_DEST_UNINITIALIZED, T_OBJECT, copy_obj_uninit_b, r0, r1, r15, copy_backwards);
|
||||
|
||||
StubRoutines::aarch64::_zero_blocks = generate_zero_blocks();
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -119,6 +119,57 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
|
||||
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst,
|
||||
Address src,
|
||||
Register tmp) {
|
||||
if (bytes == 1) {
|
||||
__ lbu(dst, src);
|
||||
} else if (bytes == 2) {
|
||||
__ lhu(dst, src);
|
||||
} else if (bytes == 4) {
|
||||
__ lwu(dst, src);
|
||||
} else if (bytes == 8) {
|
||||
__ ld(dst, src);
|
||||
} else {
|
||||
// Not the right size
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ decode_heap_oop(dst);
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3) {
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ encode_heap_oop(src);
|
||||
}
|
||||
|
||||
if (bytes == 1) {
|
||||
__ sb(src, dst);
|
||||
} else if (bytes == 2) {
|
||||
__ sh(src, dst);
|
||||
} else if (bytes == 4) {
|
||||
__ sw(src, dst);
|
||||
} else if (bytes == 8) {
|
||||
__ sd(src, dst);
|
||||
} else {
|
||||
// Not the right size
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath) {
|
||||
// If mask changes we need to ensure that the inverse is still encodable as an immediate
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -49,6 +49,27 @@ public:
|
||||
Register src, Register dst, Register count, RegSet saved_regs) {}
|
||||
virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
|
||||
Register start, Register count, Register tmp, RegSet saved_regs) {}
|
||||
|
||||
virtual void copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst,
|
||||
Address src,
|
||||
Register tmp);
|
||||
|
||||
virtual void copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
Register tmp3);
|
||||
|
||||
virtual bool supports_rvv_arraycopy() { return true; }
|
||||
|
||||
virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Register dst, Address src, Register tmp1, Register tmp2);
|
||||
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
|
@ -941,9 +941,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
void copy_memory(bool is_aligned, Register s, Register d,
|
||||
Register count, Register tmp, int step) {
|
||||
if (UseRVV) {
|
||||
void copy_memory(DecoratorSet decorators, BasicType type, bool is_aligned,
|
||||
Register s, Register d, Register count, Register tmp, int step) {
|
||||
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
if (UseRVV && (!is_reference_type(type) || bs_asm->supports_rvv_arraycopy())) {
|
||||
return copy_memory_v(s, d, count, tmp, step);
|
||||
}
|
||||
|
||||
@ -951,32 +952,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
int granularity = uabs(step);
|
||||
|
||||
const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17, tmp5 = x14, tmp6 = x13;
|
||||
const Register gct1 = x28, gct2 = x29, gct3 = t2;
|
||||
|
||||
Label same_aligned;
|
||||
Label copy_big, copy32_loop, copy8_loop, copy_small, done;
|
||||
|
||||
copy_insn ld_arr = NULL, st_arr = NULL;
|
||||
switch (granularity) {
|
||||
case 1 :
|
||||
ld_arr = (copy_insn)&MacroAssembler::lbu;
|
||||
st_arr = (copy_insn)&MacroAssembler::sb;
|
||||
break;
|
||||
case 2 :
|
||||
ld_arr = (copy_insn)&MacroAssembler::lhu;
|
||||
st_arr = (copy_insn)&MacroAssembler::sh;
|
||||
break;
|
||||
case 4 :
|
||||
ld_arr = (copy_insn)&MacroAssembler::lwu;
|
||||
st_arr = (copy_insn)&MacroAssembler::sw;
|
||||
break;
|
||||
case 8 :
|
||||
ld_arr = (copy_insn)&MacroAssembler::ld;
|
||||
st_arr = (copy_insn)&MacroAssembler::sd;
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
__ beqz(count, done);
|
||||
__ slli(cnt, count, exact_log2(granularity));
|
||||
if (is_backwards) {
|
||||
@ -1008,8 +988,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
}
|
||||
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
||||
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
||||
bs_asm->copy_load_at(_masm, decorators, type, granularity, tmp3, Address(src), gct1);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, granularity, Address(dst), tmp3, gct1, gct2, gct3);
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
@ -1028,14 +1008,15 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(dst, dst, -wordSize * 4);
|
||||
}
|
||||
// we first load 32 bytes, then write it, so the direction here doesn't matter
|
||||
__ ld(tmp3, Address(src));
|
||||
__ ld(tmp4, Address(src, 8));
|
||||
__ ld(tmp5, Address(src, 16));
|
||||
__ ld(tmp6, Address(src, 24));
|
||||
__ sd(tmp3, Address(dst));
|
||||
__ sd(tmp4, Address(dst, 8));
|
||||
__ sd(tmp5, Address(dst, 16));
|
||||
__ sd(tmp6, Address(dst, 24));
|
||||
bs_asm->copy_load_at(_masm, decorators, type, 8, tmp3, Address(src), gct1);
|
||||
bs_asm->copy_load_at(_masm, decorators, type, 8, tmp4, Address(src, 8), gct1);
|
||||
bs_asm->copy_load_at(_masm, decorators, type, 8, tmp5, Address(src, 16), gct1);
|
||||
bs_asm->copy_load_at(_masm, decorators, type, 8, tmp6, Address(src, 24), gct1);
|
||||
|
||||
bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst), tmp3, gct1, gct2, gct3);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst, 8), tmp4, gct1, gct2, gct3);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst, 16), tmp5, gct1, gct2, gct3);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst, 24), tmp6, gct1, gct2, gct3);
|
||||
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, wordSize * 4);
|
||||
@ -1055,8 +1036,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(src, src, -wordSize);
|
||||
__ addi(dst, dst, -wordSize);
|
||||
}
|
||||
__ ld(tmp3, Address(src));
|
||||
__ sd(tmp3, Address(dst));
|
||||
bs_asm->copy_load_at(_masm, decorators, type, 8, tmp3, Address(src), gct1);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, 8, Address(dst), tmp3, gct1, gct2, gct3);
|
||||
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, wordSize);
|
||||
__ addi(dst, dst, wordSize);
|
||||
@ -1072,8 +1054,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
}
|
||||
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
||||
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
||||
|
||||
bs_asm->copy_load_at(_masm, decorators, type, granularity, tmp3, Address(src), gct1);
|
||||
bs_asm->copy_store_at(_masm, decorators, type, granularity, Address(dst), tmp3, gct1, gct2, gct3);
|
||||
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
@ -1160,7 +1144,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
||||
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
||||
copy_memory(aligned, s, d, count, t0, size);
|
||||
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, t0, size);
|
||||
}
|
||||
|
||||
if (is_oop) {
|
||||
@ -1211,7 +1195,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// use fwd copy when (d-s) above_equal (count*size)
|
||||
__ sub(t0, d, s);
|
||||
__ slli(t1, count, exact_log2(size));
|
||||
__ bgeu(t0, t1, nooverlap_target);
|
||||
Label L_continue;
|
||||
__ bltu(t0, t1, L_continue);
|
||||
__ j(nooverlap_target);
|
||||
__ bind(L_continue);
|
||||
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
|
||||
if (dest_uninitialized) {
|
||||
@ -1233,7 +1220,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
|
||||
UnsafeCopyMemoryMark ucmm(this, add_entry, true);
|
||||
copy_memory(aligned, s, d, count, t0, -size);
|
||||
copy_memory(decorators, is_oop ? T_OBJECT : T_BYTE, aligned, s, d, count, t0, -size);
|
||||
}
|
||||
|
||||
if (is_oop) {
|
||||
@ -1523,6 +1510,9 @@ class StubGenerator: public StubCodeGenerator {
|
||||
const Register copied_oop = x7; // actual oop copied
|
||||
const Register r9_klass = x9; // oop._klass
|
||||
|
||||
// Registers used as gc temps (x15, x16, x17 are save-on-call)
|
||||
const Register gct1 = x15, gct2 = x16, gct3 = x17;
|
||||
|
||||
//---------------------------------------------------------------
|
||||
// Assembler stub will be used for this call to arraycopy
|
||||
// if the two arrays are subtypes of Object[] but the
|
||||
@ -1564,11 +1554,13 @@ class StubGenerator: public StubCodeGenerator {
|
||||
#endif //ASSERT
|
||||
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
|
||||
bool is_oop = true;
|
||||
if (dest_uninitialized) {
|
||||
decorators |= IS_DEST_UNINITIALIZED;
|
||||
}
|
||||
|
||||
bool is_oop = true;
|
||||
int element_size = UseCompressedOops ? 4 : 8;
|
||||
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->arraycopy_prologue(_masm, decorators, is_oop, from, to, count, wb_pre_saved_regs);
|
||||
|
||||
@ -1591,14 +1583,18 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_store_element);
|
||||
__ store_heap_oop(Address(to, 0), copied_oop, noreg, noreg, noreg, AS_RAW); // store the oop
|
||||
bs->copy_store_at(_masm, decorators, T_OBJECT, element_size,
|
||||
Address(to, 0), copied_oop,
|
||||
gct1, gct2, gct3);
|
||||
__ add(to, to, UseCompressedOops ? 4 : 8);
|
||||
__ sub(count, count, 1);
|
||||
__ beqz(count, L_do_card_marks);
|
||||
|
||||
// ======== loop entry is here ========
|
||||
__ BIND(L_load_element);
|
||||
__ load_heap_oop(copied_oop, Address(from, 0), noreg, noreg, AS_RAW); // load the oop
|
||||
bs->copy_load_at(_masm, decorators, T_OBJECT, element_size,
|
||||
copied_oop, Address(from, 0),
|
||||
gct1);
|
||||
__ add(from, from, UseCompressedOops ? 4 : 8);
|
||||
__ beqz(copied_oop, L_store_element);
|
||||
|
||||
|
@ -195,6 +195,113 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst,
|
||||
Address src,
|
||||
Register tmp) {
|
||||
assert(bytes <= 8, "can only deal with non-vector registers");
|
||||
switch (bytes) {
|
||||
case 1:
|
||||
__ movb(dst, src);
|
||||
break;
|
||||
case 2:
|
||||
__ movw(dst, src);
|
||||
break;
|
||||
case 4:
|
||||
__ movl(dst, src);
|
||||
break;
|
||||
case 8:
|
||||
#ifdef _LP64
|
||||
__ movq(dst, src);
|
||||
#else
|
||||
fatal("No support for 8 bytes copy");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
fatal("Unexpected size");
|
||||
}
|
||||
#ifdef _LP64
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ decode_heap_oop(dst);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src,
|
||||
Register tmp) {
|
||||
#ifdef _LP64
|
||||
if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) {
|
||||
__ encode_heap_oop(src);
|
||||
}
|
||||
#endif
|
||||
assert(bytes <= 8, "can only deal with non-vector registers");
|
||||
switch (bytes) {
|
||||
case 1:
|
||||
__ movb(dst, src);
|
||||
break;
|
||||
case 2:
|
||||
__ movw(dst, src);
|
||||
break;
|
||||
case 4:
|
||||
__ movl(dst, src);
|
||||
break;
|
||||
case 8:
|
||||
#ifdef _LP64
|
||||
__ movq(dst, src);
|
||||
#else
|
||||
fatal("No support for 8 bytes copy");
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
fatal("Unexpected size");
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
XMMRegister dst,
|
||||
Address src,
|
||||
Register tmp,
|
||||
XMMRegister xmm_tmp) {
|
||||
assert(bytes > 8, "can only deal with vector registers");
|
||||
if (bytes == 16) {
|
||||
__ movdqu(dst, src);
|
||||
} else if (bytes == 32) {
|
||||
__ vmovdqu(dst, src);
|
||||
} else {
|
||||
fatal("No support for >32 bytes copy");
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
XMMRegister src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
XMMRegister xmm_tmp) {
|
||||
assert(bytes > 8, "can only deal with vector registers");
|
||||
if (bytes == 16) {
|
||||
__ movdqu(dst, src);
|
||||
} else if (bytes == 32) {
|
||||
__ vmovdqu(dst, src);
|
||||
} else {
|
||||
fatal("No support for >32 bytes copy");
|
||||
}
|
||||
}
|
||||
|
||||
void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath) {
|
||||
__ clear_jobject_tag(obj);
|
||||
|
@ -49,6 +49,46 @@ public:
|
||||
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
|
||||
Address dst, Register val, Register tmp1, Register tmp2, Register tmp3);
|
||||
|
||||
// The copy_[load/store]_at functions are used by arraycopy stubs. Be careful to only use
|
||||
// r10 (aka rscratch1) in a context where restore_arg_regs_using_thread has been used instead
|
||||
// of the looser setup_arg_regs. Currently this is done when using type T_OBJECT.
|
||||
virtual void copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Register dst,
|
||||
Address src,
|
||||
Register tmp);
|
||||
|
||||
virtual void copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
Register src,
|
||||
Register tmp);
|
||||
|
||||
virtual void copy_load_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
XMMRegister dst,
|
||||
Address src,
|
||||
Register tmp,
|
||||
XMMRegister xmm_tmp);
|
||||
|
||||
virtual void copy_store_at(MacroAssembler* masm,
|
||||
DecoratorSet decorators,
|
||||
BasicType type,
|
||||
size_t bytes,
|
||||
Address dst,
|
||||
XMMRegister src,
|
||||
Register tmp1,
|
||||
Register tmp2,
|
||||
XMMRegister xmm_tmp);
|
||||
|
||||
virtual bool supports_avx3_masked_arraycopy() { return true; }
|
||||
|
||||
// Support for jniFastGetField to try resolving a jobject/jweak in native
|
||||
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
|
||||
Register obj, Register tmp, Label& slowpath);
|
||||
|
@ -1162,15 +1162,17 @@ void StubGenerator::setup_arg_regs(int nargs) {
|
||||
#ifdef _WIN64
|
||||
assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
|
||||
"unexpected argument registers");
|
||||
if (nargs >= 4)
|
||||
if (nargs == 4) {
|
||||
__ mov(rax, r9); // r9 is also saved_rdi
|
||||
}
|
||||
__ movptr(saved_rdi, rdi);
|
||||
__ movptr(saved_rsi, rsi);
|
||||
__ mov(rdi, rcx); // c_rarg0
|
||||
__ mov(rsi, rdx); // c_rarg1
|
||||
__ mov(rdx, r8); // c_rarg2
|
||||
if (nargs >= 4)
|
||||
if (nargs == 4) {
|
||||
__ mov(rcx, rax); // c_rarg3 (via rax)
|
||||
}
|
||||
#else
|
||||
assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
|
||||
"unexpected argument registers");
|
||||
@ -1192,9 +1194,13 @@ void StubGenerator::restore_arg_regs() {
|
||||
|
||||
// This is used in places where r10 is a scratch register, and can
|
||||
// be adapted if r9 is needed also.
|
||||
void StubGenerator::setup_arg_regs_using_thread() {
|
||||
void StubGenerator::setup_arg_regs_using_thread(int nargs) {
|
||||
const Register saved_r15 = r9;
|
||||
assert(nargs == 3 || nargs == 4, "else fix");
|
||||
#ifdef _WIN64
|
||||
if (nargs == 4) {
|
||||
__ mov(rax, r9); // r9 is also saved_r15
|
||||
}
|
||||
__ mov(saved_r15, r15); // r15 is callee saved and needs to be restored
|
||||
__ get_thread(r15_thread);
|
||||
assert(c_rarg0 == rcx && c_rarg1 == rdx && c_rarg2 == r8 && c_rarg3 == r9,
|
||||
@ -1205,6 +1211,9 @@ void StubGenerator::setup_arg_regs_using_thread() {
|
||||
__ mov(rdi, rcx); // c_rarg0
|
||||
__ mov(rsi, rdx); // c_rarg1
|
||||
__ mov(rdx, r8); // c_rarg2
|
||||
if (nargs == 4) {
|
||||
__ mov(rcx, rax); // c_rarg3 (via rax)
|
||||
}
|
||||
#else
|
||||
assert(c_rarg0 == rdi && c_rarg1 == rsi && c_rarg2 == rdx && c_rarg3 == rcx,
|
||||
"unexpected argument registers");
|
||||
|
@ -140,19 +140,23 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
// This is used in places where r10 is a scratch register, and can
|
||||
// be adapted if r9 is needed also.
|
||||
void setup_arg_regs_using_thread();
|
||||
void setup_arg_regs_using_thread(int nargs = 3);
|
||||
|
||||
void restore_arg_regs_using_thread();
|
||||
|
||||
// Copy big chunks forward
|
||||
void copy_bytes_forward(Register end_from, Register end_to,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes);
|
||||
Register qword_count, Register tmp1,
|
||||
Register tmp2, Label& L_copy_bytes,
|
||||
Label& L_copy_8_bytes, DecoratorSet decorators,
|
||||
BasicType type);
|
||||
|
||||
// Copy big chunks backward
|
||||
void copy_bytes_backward(Register from, Register dest,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes);
|
||||
Register qword_count, Register tmp1,
|
||||
Register tmp2, Label& L_copy_bytes,
|
||||
Label& L_copy_8_bytes, DecoratorSet decorators,
|
||||
BasicType type);
|
||||
|
||||
void setup_argument_regs(BasicType type);
|
||||
|
||||
|
@ -231,13 +231,16 @@ void StubGenerator::array_overlap_test(address no_overlap_target, Label* NOLp, A
|
||||
// end_from - source arrays end address
|
||||
// end_to - destination array end address
|
||||
// qword_count - 64-bits element count, negative
|
||||
// to - scratch
|
||||
// tmp1 - scratch
|
||||
// L_copy_bytes - entry label
|
||||
// L_copy_8_bytes - exit label
|
||||
//
|
||||
void StubGenerator::copy_bytes_forward(Register end_from, Register end_to,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes) {
|
||||
Register qword_count, Register tmp1,
|
||||
Register tmp2, Label& L_copy_bytes,
|
||||
Label& L_copy_8_bytes, DecoratorSet decorators,
|
||||
BasicType type) {
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
DEBUG_ONLY(__ stop("enter at entry label, not here"));
|
||||
Label L_loop;
|
||||
__ align(OptoLoopAlignment);
|
||||
@ -245,49 +248,102 @@ void StubGenerator::copy_bytes_forward(Register end_from, Register end_to,
|
||||
Label L_end;
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
|
||||
__ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -56),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(end_to, qword_count, Address::times_8, -56), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -24),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(end_to, qword_count, Address::times_8, -24), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
|
||||
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
|
||||
__ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
|
||||
__ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -56),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -56), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -40),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -40), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -24),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -24), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -8),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -8), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
}
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
__ addptr(qword_count, 8);
|
||||
__ jcc(Assembler::lessEqual, L_loop);
|
||||
__ subptr(qword_count, 4); // sub(8) and add(4)
|
||||
__ jccb(Assembler::greater, L_end);
|
||||
__ jcc(Assembler::greater, L_end);
|
||||
// Copy trailing 32 bytes
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -24),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(end_to, qword_count, Address::times_8, -24), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
|
||||
__ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
|
||||
__ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -24),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -24), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(end_from, qword_count, Address::times_8, -8),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(end_to, qword_count, Address::times_8, -8), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
}
|
||||
__ addptr(qword_count, 4);
|
||||
__ BIND(L_end);
|
||||
} else {
|
||||
// Copy 32-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, -24));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, -24), to);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, -16));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, -16), to);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
|
||||
__ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(end_from, qword_count, Address::times_8, -24),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(end_to, qword_count, Address::times_8, -24), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(end_from, qword_count, Address::times_8, -16),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(end_to, qword_count, Address::times_8, -16), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(end_from, qword_count, Address::times_8, -8),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(end_to, qword_count, Address::times_8, -8), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(end_from, qword_count, Address::times_8, 0),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(end_to, qword_count, Address::times_8, 0), tmp1,
|
||||
tmp2);
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
__ addptr(qword_count, 4);
|
||||
@ -304,13 +360,16 @@ void StubGenerator::copy_bytes_forward(Register end_from, Register end_to,
|
||||
// from - source arrays address
|
||||
// dest - destination array address
|
||||
// qword_count - 64-bits element count
|
||||
// to - scratch
|
||||
// tmp1 - scratch
|
||||
// L_copy_bytes - entry label
|
||||
// L_copy_8_bytes - exit label
|
||||
//
|
||||
void StubGenerator::copy_bytes_backward(Register from, Register dest,
|
||||
Register qword_count, Register to,
|
||||
Label& L_copy_bytes, Label& L_copy_8_bytes) {
|
||||
Register qword_count, Register tmp1,
|
||||
Register tmp2, Label& L_copy_bytes,
|
||||
Label& L_copy_8_bytes, DecoratorSet decorators,
|
||||
BasicType type) {
|
||||
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
DEBUG_ONLY(__ stop("enter at entry label, not here"));
|
||||
Label L_loop;
|
||||
__ align(OptoLoopAlignment);
|
||||
@ -318,19 +377,43 @@ void StubGenerator::copy_bytes_backward(Register from, Register dest,
|
||||
Label L_end;
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
|
||||
__ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 32),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(dest, qword_count, Address::times_8, 32), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 0),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(dest, qword_count, Address::times_8, 0), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
|
||||
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
|
||||
__ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
|
||||
__ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 48),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 48), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 32),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 32), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 16),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 16), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 0),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 0), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
}
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
@ -338,30 +421,58 @@ void StubGenerator::copy_bytes_backward(Register from, Register dest,
|
||||
__ jcc(Assembler::greaterEqual, L_loop);
|
||||
|
||||
__ addptr(qword_count, 4); // add(8) and sub(4)
|
||||
__ jccb(Assembler::less, L_end);
|
||||
__ jcc(Assembler::less, L_end);
|
||||
// Copy trailing 32 bytes
|
||||
if (UseAVX >= 2) {
|
||||
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
|
||||
__ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
|
||||
bs->copy_load_at(_masm, decorators, type, 32,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 0),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 32,
|
||||
Address(dest, qword_count, Address::times_8, 0), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
} else {
|
||||
__ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
|
||||
__ movdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 16),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 16), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
bs->copy_load_at(_masm, decorators, type, 16,
|
||||
xmm0, Address(from, qword_count, Address::times_8, 0),
|
||||
tmp1, xmm1);
|
||||
bs->copy_store_at(_masm, decorators, type, 16,
|
||||
Address(dest, qword_count, Address::times_8, 0), xmm0,
|
||||
tmp1, tmp2, xmm1);
|
||||
}
|
||||
__ subptr(qword_count, 4);
|
||||
__ BIND(L_end);
|
||||
} else {
|
||||
// Copy 32-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 24));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 24), to);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 16));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 16), to);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 8));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 8), to);
|
||||
__ movq(to, Address(from, qword_count, Address::times_8, 0));
|
||||
__ movq(Address(dest, qword_count, Address::times_8, 0), to);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(from, qword_count, Address::times_8, 24),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(dest, qword_count, Address::times_8, 24), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(from, qword_count, Address::times_8, 16),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(dest, qword_count, Address::times_8, 16), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(from, qword_count, Address::times_8, 8),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(dest, qword_count, Address::times_8, 8), tmp1,
|
||||
tmp2);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
tmp1, Address(from, qword_count, Address::times_8, 0),
|
||||
tmp2);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(dest, qword_count, Address::times_8, 0), tmp1,
|
||||
tmp2);
|
||||
|
||||
__ BIND(L_copy_bytes);
|
||||
__ subptr(qword_count, 4);
|
||||
@ -1024,6 +1135,7 @@ address StubGenerator::generate_disjoint_byte_copy(bool aligned, address* entry,
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
|
||||
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
Label L_copy_byte, L_exit;
|
||||
@ -1108,7 +1220,7 @@ __ BIND(L_exit);
|
||||
{
|
||||
UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, T_BYTE);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
}
|
||||
return start;
|
||||
@ -1141,6 +1253,7 @@ address StubGenerator::generate_conjoint_byte_copy(bool aligned, address nooverl
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
|
||||
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes, L_copy_2_bytes;
|
||||
const Register from = rdi; // source array address
|
||||
@ -1211,7 +1324,7 @@ address StubGenerator::generate_conjoint_byte_copy(bool aligned, address nooverl
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
UnsafeCopyMemoryMark ucmm(this, !aligned, true);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, T_BYTE);
|
||||
}
|
||||
restore_arg_regs();
|
||||
INC_COUNTER_NP(SharedRuntime::_jbyte_array_copy_ctr, rscratch1); // Update counter after rscratch1 is free
|
||||
@ -1254,6 +1367,7 @@ address StubGenerator::generate_disjoint_short_copy(bool aligned, address *entry
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
|
||||
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes,L_copy_2_bytes,L_exit;
|
||||
const Register from = rdi; // source array address
|
||||
@ -1330,7 +1444,7 @@ __ BIND(L_exit);
|
||||
{
|
||||
UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, T_SHORT);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
}
|
||||
|
||||
@ -1388,6 +1502,7 @@ address StubGenerator::generate_conjoint_short_copy(bool aligned, address noover
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
DecoratorSet decorators = IN_HEAP | IS_ARRAY;
|
||||
|
||||
Label L_copy_bytes, L_copy_8_bytes, L_copy_4_bytes;
|
||||
const Register from = rdi; // source array address
|
||||
@ -1450,7 +1565,7 @@ address StubGenerator::generate_conjoint_short_copy(bool aligned, address noover
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
UnsafeCopyMemoryMark ucmm(this, !aligned, true);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, T_SHORT);
|
||||
}
|
||||
restore_arg_regs();
|
||||
INC_COUNTER_NP(SharedRuntime::_jshort_array_copy_ctr, rscratch1); // Update counter after rscratch1 is free
|
||||
@ -1484,8 +1599,9 @@ address StubGenerator::generate_conjoint_short_copy(bool aligned, address noover
|
||||
//
|
||||
address StubGenerator::generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
|
||||
const char *name, bool dest_uninitialized) {
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
return generate_disjoint_copy_avx3_masked(entry, "jint_disjoint_arraycopy_avx3", 2,
|
||||
aligned, is_oop, dest_uninitialized);
|
||||
}
|
||||
@ -1527,7 +1643,6 @@ address StubGenerator::generate_disjoint_int_oop_copy(bool aligned, bool is_oop,
|
||||
}
|
||||
|
||||
BasicType type = is_oop ? T_OBJECT : T_INT;
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
|
||||
|
||||
{
|
||||
@ -1570,7 +1685,7 @@ __ BIND(L_exit);
|
||||
{
|
||||
UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_INT);
|
||||
__ jmp(L_copy_4_bytes);
|
||||
}
|
||||
|
||||
@ -1596,8 +1711,9 @@ __ BIND(L_exit);
|
||||
address StubGenerator::generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
|
||||
address *entry, const char *name,
|
||||
bool dest_uninitialized) {
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
return generate_conjoint_copy_avx3_masked(entry, "jint_conjoint_arraycopy_avx3", 2,
|
||||
nooverlap_target, aligned, is_oop, dest_uninitialized);
|
||||
}
|
||||
@ -1635,7 +1751,6 @@ address StubGenerator::generate_conjoint_int_oop_copy(bool aligned, bool is_oop,
|
||||
}
|
||||
|
||||
BasicType type = is_oop ? T_OBJECT : T_INT;
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
// no registers are destroyed by this call
|
||||
bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
|
||||
|
||||
@ -1677,7 +1792,7 @@ address StubGenerator::generate_conjoint_int_oop_copy(bool aligned, bool is_oop,
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_INT);
|
||||
}
|
||||
|
||||
__ BIND(L_exit);
|
||||
@ -1710,8 +1825,9 @@ __ BIND(L_exit);
|
||||
//
|
||||
address StubGenerator::generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
|
||||
const char *name, bool dest_uninitialized) {
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
return generate_disjoint_copy_avx3_masked(entry, "jlong_disjoint_arraycopy_avx3", 3,
|
||||
aligned, is_oop, dest_uninitialized);
|
||||
}
|
||||
@ -1753,7 +1869,6 @@ address StubGenerator::generate_disjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
}
|
||||
|
||||
BasicType type = is_oop ? T_OBJECT : T_LONG;
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
|
||||
{
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
@ -1767,8 +1882,12 @@ address StubGenerator::generate_disjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
__ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
|
||||
__ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
rax, Address(end_from, qword_count, Address::times_8, 8),
|
||||
r10);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(end_to, qword_count, Address::times_8, 8), rax,
|
||||
r10);
|
||||
__ increment(qword_count);
|
||||
__ jcc(Assembler::notZero, L_copy_8_bytes);
|
||||
}
|
||||
@ -1787,7 +1906,7 @@ address StubGenerator::generate_disjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_forward(end_from, end_to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_LONG);
|
||||
}
|
||||
|
||||
__ BIND(L_exit);
|
||||
@ -1819,8 +1938,9 @@ address StubGenerator::generate_disjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
address StubGenerator::generate_conjoint_long_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
|
||||
address *entry, const char *name,
|
||||
bool dest_uninitialized) {
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
if ((!is_oop || bs->supports_avx3_masked_arraycopy()) && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2() && MaxVectorSize >= 32) {
|
||||
return generate_conjoint_copy_avx3_masked(entry, "jlong_conjoint_arraycopy_avx3", 3,
|
||||
nooverlap_target, aligned, is_oop, dest_uninitialized);
|
||||
}
|
||||
@ -1858,7 +1978,6 @@ address StubGenerator::generate_conjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
}
|
||||
|
||||
BasicType type = is_oop ? T_OBJECT : T_LONG;
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
|
||||
{
|
||||
// UnsafeCopyMemory page error: continue after ucm
|
||||
@ -1868,8 +1987,12 @@ address StubGenerator::generate_conjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
|
||||
// Copy trailing qwords
|
||||
__ BIND(L_copy_8_bytes);
|
||||
__ movq(rax, Address(from, qword_count, Address::times_8, -8));
|
||||
__ movq(Address(to, qword_count, Address::times_8, -8), rax);
|
||||
bs->copy_load_at(_masm, decorators, type, 8,
|
||||
rax, Address(from, qword_count, Address::times_8, -8),
|
||||
r10);
|
||||
bs->copy_store_at(_masm, decorators, type, 8,
|
||||
Address(to, qword_count, Address::times_8, -8), rax,
|
||||
r10);
|
||||
__ decrement(qword_count);
|
||||
__ jcc(Assembler::notZero, L_copy_8_bytes);
|
||||
}
|
||||
@ -1888,7 +2011,7 @@ address StubGenerator::generate_conjoint_long_oop_copy(bool aligned, bool is_oop
|
||||
UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
|
||||
|
||||
// Copy in multi-bytes chunks
|
||||
copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
|
||||
copy_bytes_backward(from, to, qword_count, rax, r10, L_copy_bytes, L_copy_8_bytes, decorators, is_oop ? T_OBJECT : T_LONG);
|
||||
}
|
||||
__ BIND(L_exit);
|
||||
bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
|
||||
@ -1987,9 +2110,9 @@ address StubGenerator::generate_checkcast_copy(const char *name, address *entry,
|
||||
}
|
||||
#endif //ASSERT
|
||||
|
||||
setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
|
||||
// ckoff => rcx, ckval => r8
|
||||
// r9 and r10 may be used to save non-volatile registers
|
||||
setup_arg_regs_using_thread(4); // from => rdi, to => rsi, length => rdx
|
||||
// ckoff => rcx, ckval => r8
|
||||
// r9 is used to save r15_thread
|
||||
#ifdef _WIN64
|
||||
// last argument (#4) is on stack on Win64
|
||||
__ movptr(ckval, Address(rsp, 6 * wordSize));
|
||||
@ -2052,6 +2175,8 @@ address StubGenerator::generate_checkcast_copy(const char *name, address *entry,
|
||||
}
|
||||
|
||||
BasicType type = T_OBJECT;
|
||||
size_t element_size = UseCompressedOops ? 4 : 8;
|
||||
|
||||
BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
|
||||
bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
|
||||
|
||||
@ -2075,13 +2200,25 @@ address StubGenerator::generate_checkcast_copy(const char *name, address *entry,
|
||||
__ align(OptoLoopAlignment);
|
||||
|
||||
__ BIND(L_store_element);
|
||||
__ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, noreg, AS_RAW); // store the oop
|
||||
bs->copy_store_at(_masm,
|
||||
decorators,
|
||||
type,
|
||||
element_size,
|
||||
to_element_addr,
|
||||
rax_oop,
|
||||
r10);
|
||||
__ increment(count); // increment the count toward zero
|
||||
__ jcc(Assembler::zero, L_do_card_marks);
|
||||
|
||||
// ======== loop entry is here ========
|
||||
__ BIND(L_load_element);
|
||||
__ load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop
|
||||
bs->copy_load_at(_masm,
|
||||
decorators,
|
||||
type,
|
||||
element_size,
|
||||
rax_oop,
|
||||
from_element_addr,
|
||||
r10);
|
||||
__ testptr(rax_oop, rax_oop);
|
||||
__ jcc(Assembler::zero, L_store_element);
|
||||
|
||||
@ -2113,7 +2250,7 @@ address StubGenerator::generate_checkcast_copy(const char *name, address *entry,
|
||||
__ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
|
||||
__ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
|
||||
__ movptr(r10, Address(rsp, saved_r10_offset * wordSize));
|
||||
restore_arg_regs();
|
||||
restore_arg_regs_using_thread();
|
||||
INC_COUNTER_NP(SharedRuntime::_checkcast_array_copy_ctr, rscratch1); // Update counter after rscratch1 is free
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
@ -2529,7 +2666,7 @@ __ BIND(L_checkcast_copy);
|
||||
// the checkcast_copy loop needs two extra arguments:
|
||||
assert(c_rarg3 == sco_temp, "#3 already in place");
|
||||
// Set up arguments for checkcast_copy_entry.
|
||||
setup_arg_regs(4);
|
||||
setup_arg_regs_using_thread(4);
|
||||
__ movptr(r8, r11_dst_klass); // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris
|
||||
__ jump(RuntimeAddress(checkcast_copy_entry));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user