8296602: RISC-V: improve performance of copy_memory stub
Reviewed-by: fyang
This commit is contained in:
parent
4527dc67be
commit
bd57e2138f
@ -881,7 +881,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//
|
||||
/*
|
||||
* if (is_aligned) {
|
||||
* goto copy_8_bytes;
|
||||
* if (count >= 32)
|
||||
* goto copy32_loop;
|
||||
* if (count >= 8)
|
||||
* goto copy8_loop;
|
||||
* goto copy_small;
|
||||
* }
|
||||
* bool is_backwards = step < 0;
|
||||
* int granularity = uabs(step);
|
||||
@ -899,9 +903,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
*
|
||||
* if ((dst % 8) == (src % 8)) {
|
||||
* aligned;
|
||||
* goto copy8;
|
||||
* goto copy_big;
|
||||
* }
|
||||
*
|
||||
* copy_big:
|
||||
* if the amount to copy is more than (or equal to) 32 bytes goto copy32_loop
|
||||
* else goto copy8_loop
|
||||
* copy_small:
|
||||
* load element one by one;
|
||||
* done;
|
||||
@ -962,10 +969,10 @@ class StubGenerator: public StubCodeGenerator {
|
||||
bool is_backwards = step < 0;
|
||||
int granularity = uabs(step);
|
||||
|
||||
const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17;
|
||||
const Register src = x30, dst = x31, cnt = x15, tmp3 = x16, tmp4 = x17, tmp5 = x14, tmp6 = x13;
|
||||
|
||||
Label same_aligned;
|
||||
Label copy8, copy_small, done;
|
||||
Label copy_big, copy32_loop, copy8_loop, copy_small, done;
|
||||
|
||||
copy_insn ld_arr = NULL, st_arr = NULL;
|
||||
switch (granularity) {
|
||||
@ -1000,36 +1007,69 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
|
||||
if (is_aligned) {
|
||||
__ addi(tmp, cnt, -32);
|
||||
__ bgez(tmp, copy32_loop);
|
||||
__ addi(tmp, cnt, -8);
|
||||
__ bgez(tmp, copy8);
|
||||
__ bgez(tmp, copy8_loop);
|
||||
__ j(copy_small);
|
||||
} else {
|
||||
__ mv(tmp, 16);
|
||||
__ blt(cnt, tmp, copy_small);
|
||||
|
||||
__ xorr(tmp, src, dst);
|
||||
__ andi(tmp, tmp, 0b111);
|
||||
__ bnez(tmp, copy_small);
|
||||
|
||||
__ bind(same_aligned);
|
||||
__ andi(tmp, src, 0b111);
|
||||
__ beqz(tmp, copy_big);
|
||||
if (is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
}
|
||||
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
||||
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
}
|
||||
__ addi(cnt, cnt, -granularity);
|
||||
__ beqz(cnt, done);
|
||||
__ j(same_aligned);
|
||||
|
||||
__ bind(copy_big);
|
||||
__ mv(tmp, 32);
|
||||
__ blt(cnt, tmp, copy8_loop);
|
||||
}
|
||||
|
||||
__ mv(tmp, 16);
|
||||
__ blt(cnt, tmp, copy_small);
|
||||
|
||||
__ xorr(tmp, src, dst);
|
||||
__ andi(tmp, tmp, 0b111);
|
||||
__ bnez(tmp, copy_small);
|
||||
|
||||
__ bind(same_aligned);
|
||||
__ andi(tmp, src, 0b111);
|
||||
__ beqz(tmp, copy8);
|
||||
__ bind(copy32_loop);
|
||||
if (is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
__ addi(src, src, -wordSize * 4);
|
||||
__ addi(dst, dst, -wordSize * 4);
|
||||
}
|
||||
(_masm->*ld_arr)(tmp3, Address(src), t0);
|
||||
(_masm->*st_arr)(tmp3, Address(dst), t0);
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, step);
|
||||
__ addi(dst, dst, step);
|
||||
}
|
||||
__ addi(cnt, cnt, -granularity);
|
||||
__ beqz(cnt, done);
|
||||
__ j(same_aligned);
|
||||
// we first load 32 bytes, then write it, so the direction here doesn't matter
|
||||
__ ld(tmp3, Address(src));
|
||||
__ ld(tmp4, Address(src, 8));
|
||||
__ ld(tmp5, Address(src, 16));
|
||||
__ ld(tmp6, Address(src, 24));
|
||||
__ sd(tmp3, Address(dst));
|
||||
__ sd(tmp4, Address(dst, 8));
|
||||
__ sd(tmp5, Address(dst, 16));
|
||||
__ sd(tmp6, Address(dst, 24));
|
||||
|
||||
__ bind(copy8);
|
||||
if (!is_backwards) {
|
||||
__ addi(src, src, wordSize * 4);
|
||||
__ addi(dst, dst, wordSize * 4);
|
||||
}
|
||||
__ addi(tmp, cnt, -(32 + wordSize * 4));
|
||||
__ addi(cnt, cnt, -wordSize * 4);
|
||||
__ bgez(tmp, copy32_loop); // cnt >= 32, do next loop
|
||||
|
||||
__ beqz(cnt, done); // if that's all - done
|
||||
|
||||
__ addi(tmp, cnt, -8); // if not - copy the reminder
|
||||
__ bltz(tmp, copy_small); // cnt < 8, go to copy_small, else fall throught to copy8_loop
|
||||
|
||||
__ bind(copy8_loop);
|
||||
if (is_backwards) {
|
||||
__ addi(src, src, -wordSize);
|
||||
__ addi(dst, dst, -wordSize);
|
||||
@ -1040,11 +1080,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ addi(src, src, wordSize);
|
||||
__ addi(dst, dst, wordSize);
|
||||
}
|
||||
__ addi(tmp, cnt, -(8 + wordSize));
|
||||
__ addi(cnt, cnt, -wordSize);
|
||||
__ addi(tmp4, cnt, -8);
|
||||
__ bgez(tmp4, copy8); // cnt >= 8, do next loop
|
||||
__ bgez(tmp, copy8_loop); // cnt >= 8, do next loop
|
||||
|
||||
__ beqz(cnt, done);
|
||||
__ beqz(cnt, done); // if that's all - done
|
||||
|
||||
__ bind(copy_small);
|
||||
if (is_backwards) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user