8142362: Lots of code duplication in Copy class

Reviewed-by: kvn, hseigel
This commit is contained in:
Coleen Phillimore 2022-01-10 14:34:04 +00:00
parent 11d88ce82e
commit 76477f8cdb
11 changed files with 466 additions and 1177 deletions

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -52,4 +52,166 @@ static void pd_zero_to_bytes(void* to, size_t count) {
(void)memset(to, 0, count);
}
#ifndef _WINDOWS
#define COPY_SMALL(from, to, count) \
{ \
long tmp0, tmp1, tmp2, tmp3; \
long tmp4, tmp5, tmp6, tmp7; \
__asm volatile( \
" adr %[t0], 0f;\n" \
" add %[t0], %[t0], %[cnt], lsl #5;\n" \
" br %[t0];\n" \
" .align 5;\n" \
"0:" \
" b 1f;\n" \
" .align 5;\n" \
" ldr %[t0], [%[s], #0];\n" \
" str %[t0], [%[d], #0];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldr %[t2], [%[s], #16];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" str %[t2], [%[d], #16];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldr %[t4], [%[s], #32];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" str %[t4], [%[d], #32];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldp %[t4], %[t5], [%[s], #32];\n" \
"2:" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" stp %[t4], %[t5], [%[d], #32];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldr %[t6], [%[s], #0];\n" \
" ldp %[t0], %[t1], [%[s], #8];\n" \
" ldp %[t2], %[t3], [%[s], #24];\n" \
" ldp %[t4], %[t5], [%[s], #40];\n" \
" str %[t6], [%[d]], #8;\n" \
" b 2b;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldp %[t4], %[t5], [%[s], #32];\n" \
" ldp %[t6], %[t7], [%[s], #48];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" stp %[t4], %[t5], [%[d], #32];\n" \
" stp %[t6], %[t7], [%[d], #48];\n" \
"1:" \
\
: [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count), \
[t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \
[t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7) \
: \
: "memory", "cc"); \
}
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_conjoint_words(from, to, count);
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
if (__builtin_constant_p(count)) {
memcpy(to, from, count * sizeof(HeapWord));
return;
}
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
(void)memmove(to, from, count);
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
_Copy_conjoint_jints_atomic(from, to, count);
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
_Copy_conjoint_jlongs_atomic(from, to, count);
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jints(from, to, count);
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
assert(!UseCompressedOops, "foo!");
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
#endif // _WINDOWS
#endif // CPU_AARCH64_COPY_AARCH64_HPP

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -59,4 +59,293 @@ static void pd_zero_to_bytes(void* to, size_t count) {
(void)memset(to, 0, count);
}
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#if defined AMD64 || defined _WINDOWS
(void)memmove(to, from, count * HeapWordSize);
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 7f ;"
" cmpl %4,%5 ;"
" leal -4(%4,%6,4),%3;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 4f ;"
"1: cmpl $32,%6 ;"
" ja 3f ;"
" subl %4,%1 ;"
"2: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 2b ;"
" jmp 7f ;"
"3: rep; smovl ;"
" jmp 7f ;"
"4: cmpl $32,%2 ;"
" movl %7,%0 ;"
" leal -4(%5,%6,4),%1;"
" ja 6f ;"
" subl %4,%1 ;"
"5: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" subl $4,%0 ;"
" subl $1,%2 ;"
" jnz 5b ;"
" jmp 7f ;"
"6: std ;"
" rep; smovl ;"
" cld ;"
"7: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags");
#endif // AMD64
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
(void)memcpy(to, from, count * HeapWordSize);
break;
}
#else
#if defined _WINDOWS
(void)memcpy(to, from, count * HeapWordSize);
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 3f ;"
" cmpl $32,%6 ;"
" ja 2f ;"
" subl %4,%1 ;"
"1: movl (%4),%3 ;"
" movl %7,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 1b ;"
" jmp 3f ;"
"2: rep; smovl ;"
"3: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "cc");
#endif // _WINDOWS
#endif // AMD64
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
while (count-- > 0) {
*to++ = *from++;
}
break;
}
#else
// pd_disjoint_words is word-atomic in this implementation.
pd_disjoint_words(from, to, count);
#endif // AMD64
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
#if defined AMD64 || defined _WINDOWS
(void)memmove(to, from, count);
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 13f ;"
" cmpl %4,%5 ;"
" leal -1(%4,%6),%3 ;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 8f ;"
"1: cmpl $3,%6 ;"
" jbe 6f ;"
" movl %6,%3 ;"
" movl $4,%2 ;"
" subl %4,%2 ;"
" andl $3,%2 ;"
" jz 2f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"2: movl %7,%2 ;"
" shrl $2,%2 ;"
" jz 5f ;"
" cmpl $32,%2 ;"
" ja 4f ;"
" subl %4,%1 ;"
"3: movl (%4),%%edx ;"
" movl %%edx,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 3b ;"
" addl %4,%1 ;"
" jmp 5f ;"
"4: rep; smovl ;"
"5: movl %7,%2 ;"
" andl $3,%2 ;"
" jz 13f ;"
"6: xorl %7,%3 ;"
"7: movb (%4,%7,1),%%dl ;"
" movb %%dl,(%5,%7,1) ;"
" addl $1,%3 ;"
" subl $1,%2 ;"
" jnz 7b ;"
" jmp 13f ;"
"8: std ;"
" cmpl $12,%2 ;"
" ja 9f ;"
" movl %7,%0 ;"
" leal -1(%6,%5),%1 ;"
" jmp 11f ;"
"9: xchgl %3,%2 ;"
" movl %6,%0 ;"
" addl $1,%2 ;"
" leal -1(%7,%5),%1 ;"
" andl $3,%2 ;"
" jz 10f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"10: movl %7,%2 ;"
" subl $3,%0 ;"
" shrl $2,%2 ;"
" subl $3,%1 ;"
" rep; smovl ;"
" andl $3,%3 ;"
" jz 12f ;"
" movl %7,%2 ;"
" addl $3,%0 ;"
" addl $3,%1 ;"
"11: rep; smovb ;"
"12: cld ;"
"13: nop ;"
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags", "%edx");
#endif // AMD64
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
// Windows has a different implementation
#ifndef _WINDOWS
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jints_atomic(from, to, count);
#else
assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jlongs_atomic(from, to, count);
#else
// Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
if (from > to) {
while (count-- > 0) {
__asm__ volatile("fildll (%0); fistpll (%1)"
:
: "r" (from), "r" (to)
: "memory" );
++from;
++to;
}
} else {
while (count-- > 0) {
__asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
:
: "r" (from), "r" (to), "r" (count)
: "memory" );
}
}
#endif // AMD64
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#else
assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jints(from, to, count);
#else
pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count);
#endif // AMD64
}
#endif // _WINDOWS
#endif // CPU_X86_COPY_X86_HPP

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@ -27,163 +27,6 @@
#ifndef OS_CPU_BSD_AARCH64_COPY_BSD_AARCH64_HPP
#define OS_CPU_BSD_AARCH64_COPY_BSD_AARCH64_HPP
#define COPY_SMALL(from, to, count) \
{ \
long tmp0, tmp1, tmp2, tmp3; \
long tmp4, tmp5, tmp6, tmp7; \
__asm volatile( \
" adr %[t0], 0f;\n" \
" add %[t0], %[t0], %[cnt], lsl #5;\n" \
" br %[t0];\n" \
" .align 5;\n" \
"0:" \
" b 1f;\n" \
" .align 5;\n" \
" ldr %[t0], [%[s], #0];\n" \
" str %[t0], [%[d], #0];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldr %[t2], [%[s], #16];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" str %[t2], [%[d], #16];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldr %[t4], [%[s], #32];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" str %[t4], [%[d], #32];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldp %[t4], %[t5], [%[s], #32];\n" \
"2:" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" stp %[t4], %[t5], [%[d], #32];\n" \
" b 1f;\n" \
" .align 5;\n" \
" ldr %[t6], [%[s], #0];\n" \
" ldp %[t0], %[t1], [%[s], #8];\n" \
" ldp %[t2], %[t3], [%[s], #24];\n" \
" ldp %[t4], %[t5], [%[s], #40];\n" \
" str %[t6], [%[d]], #8;\n" \
" b 2b;\n" \
" .align 5;\n" \
" ldp %[t0], %[t1], [%[s], #0];\n" \
" ldp %[t2], %[t3], [%[s], #16];\n" \
" ldp %[t4], %[t5], [%[s], #32];\n" \
" ldp %[t6], %[t7], [%[s], #48];\n" \
" stp %[t0], %[t1], [%[d], #0];\n" \
" stp %[t2], %[t3], [%[d], #16];\n" \
" stp %[t4], %[t5], [%[d], #32];\n" \
" stp %[t6], %[t7], [%[d], #48];\n" \
"1:" \
\
: [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count), \
[t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \
[t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7) \
: \
: "memory", "cc"); \
}
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_conjoint_words(from, to, count);
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
if (__builtin_constant_p(count)) {
memcpy(to, from, count * sizeof(HeapWord));
return;
}
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
(void)memmove(to, from, count);
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
_Copy_conjoint_jints_atomic(from, to, count);
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
_Copy_conjoint_jlongs_atomic(from, to, count);
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jints(from, to, count);
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
assert(!UseCompressedOops, "foo!");
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
// Empty for build system
#endif // OS_CPU_BSD_AARCH64_COPY_BSD_AARCH64_HPP

@ -1,5 +1,5 @@
#
# Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2004, 2022, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -39,7 +39,6 @@
# point or use it in the same manner as does the server
# compiler.
.globl SYMBOL(_Copy_conjoint_bytes)
.globl SYMBOL(_Copy_arrayof_conjoint_bytes)
.globl SYMBOL(_Copy_conjoint_jshorts_atomic)
.globl SYMBOL(_Copy_arrayof_conjoint_jshorts)
@ -72,117 +71,10 @@ SYMBOL(SpinPause):
movl $1, %eax
ret
# Support for void Copy::conjoint_bytes(void* from,
# void* to,
# size_t count)
.p2align 4,,15
ELF_TYPE(_Copy_conjoint_bytes,@function)
SYMBOL(_Copy_conjoint_bytes):
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -1(%esi,%ecx),%eax # from + count - 1
jbe cb_CopyRight
cmpl %eax,%edi
jbe cb_CopyLeft
# copy from low to high
cb_CopyRight:
cmpl $3,%ecx
jbe 5f # <= 3 bytes
# align source address at dword address boundary
movl %ecx,%eax # original count
movl $4,%ecx
subl %esi,%ecx
andl $3,%ecx # prefix byte count
jz 1f # no prefix
subl %ecx,%eax # byte count less prefix
# copy prefix
subl %esi,%edi
0: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
addl $1,%esi
subl $1,%ecx
jnz 0b
addl %esi,%edi
1: movl %eax,%ecx # byte count less prefix
shrl $2,%ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: movl %eax,%ecx # byte count less prefix
5: andl $3,%ecx # suffix byte count
jz 7f # no suffix
# copy suffix
xorl %eax,%eax
6: movb (%esi,%eax,1),%dl
movb %dl,(%edi,%eax,1)
addl $1,%eax
subl $1,%ecx
jnz 6b
7: popl %edi
popl %esi
ret
# copy from high to low
cb_CopyLeft:
std
leal -4(%edi,%ecx),%edi # to + count - 4
movl %eax,%esi # from + count - 1
movl %ecx,%eax
subl $3,%esi # from + count - 4
cmpl $3,%ecx
jbe 5f # <= 3 bytes
1: shrl $2,%ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
# copy dwords, aligned or not
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
# copy dwords, aligned or not
3: rep; smovl
4: movl %eax,%ecx # byte count
5: andl $3,%ecx # suffix byte count
jz 7f # no suffix
# copy suffix
subl %esi,%edi
addl $3,%esi
6: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
subl $1,%esi
subl $1,%ecx
jnz 6b
7: cld
popl %edi
popl %esi
ret
# Support for void Copy::arrayof_conjoint_bytes(void* from,
# void* to,
# size_t count)
#
# Same as _Copy_conjoint_bytes, except no source alignment check.
.p2align 4,,15
ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function)
SYMBOL(_Copy_arrayof_conjoint_bytes):

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,285 +25,6 @@
#ifndef OS_CPU_BSD_X86_COPY_BSD_X86_HPP
#define OS_CPU_BSD_X86_COPY_BSD_X86_HPP
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count * HeapWordSize);
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 7f ;"
" cmpl %4,%5 ;"
" leal -4(%4,%6,4),%3;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 4f ;"
"1: cmpl $32,%6 ;"
" ja 3f ;"
" subl %4,%1 ;"
"2: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 2b ;"
" jmp 7f ;"
"3: rep; smovl ;"
" jmp 7f ;"
"4: cmpl $32,%2 ;"
" movl %7,%0 ;"
" leal -4(%5,%6,4),%1;"
" ja 6f ;"
" subl %4,%1 ;"
"5: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" subl $4,%0 ;"
" subl $1,%2 ;"
" jnz 5b ;"
" jmp 7f ;"
"6: std ;"
" rep; smovl ;"
" cld ;"
"7: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags");
#endif // AMD64
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
(void)memcpy(to, from, count * HeapWordSize);
break;
}
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 3f ;"
" cmpl $32,%6 ;"
" ja 2f ;"
" subl %4,%1 ;"
"1: movl (%4),%3 ;"
" movl %7,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 1b ;"
" jmp 3f ;"
"2: rep; smovl ;"
"3: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "cc");
#endif // AMD64
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
while (count-- > 0) {
*to++ = *from++;
}
break;
}
#else
// pd_disjoint_words is word-atomic in this implementation.
pd_disjoint_words(from, to, count);
#endif // AMD64
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count);
#else
// Includes a zero-count check.
intx temp;
__asm__ volatile(" testl %6,%6 ;"
" jz 13f ;"
" cmpl %4,%5 ;"
" leal -1(%4,%6),%3 ;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 8f ;"
"1: cmpl $3,%6 ;"
" jbe 6f ;"
" movl %6,%3 ;"
" movl $4,%2 ;"
" subl %4,%2 ;"
" andl $3,%2 ;"
" jz 2f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"2: movl %7,%2 ;"
" shrl $2,%2 ;"
" jz 5f ;"
" cmpl $32,%2 ;"
" ja 4f ;"
" subl %4,%1 ;"
"3: movl (%4),%%edx ;"
" movl %%edx,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 3b ;"
" addl %4,%1 ;"
" jmp 5f ;"
"4: rep; smovl ;"
"5: movl %7,%2 ;"
" andl $3,%2 ;"
" jz 13f ;"
"6: xorl %7,%3 ;"
"7: movb (%4,%7,1),%%dl ;"
" movb %%dl,(%5,%7,1) ;"
" addl $1,%3 ;"
" subl $1,%2 ;"
" jnz 7b ;"
" jmp 13f ;"
"8: std ;"
" cmpl $12,%2 ;"
" ja 9f ;"
" movl %7,%0 ;"
" leal -1(%6,%5),%1 ;"
" jmp 11f ;"
"9: xchgl %3,%2 ;"
" movl %6,%0 ;"
" addl $1,%2 ;"
" leal -1(%7,%5),%1 ;"
" andl $3,%2 ;"
" jz 10f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"10: movl %7,%2 ;"
" subl $3,%0 ;"
" shrl $2,%2 ;"
" subl $3,%1 ;"
" rep; smovl ;"
" andl $3,%3 ;"
" jz 12f ;"
" movl %7,%2 ;"
" addl $3,%0 ;"
" addl $3,%1 ;"
"11: rep; smovb ;"
"12: cld ;"
"13: nop ;"
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags", "%edx");
#endif // AMD64
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jints_atomic(from, to, count);
#else
assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jlongs_atomic(from, to, count);
#else
// Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
if (from > to) {
while (count-- > 0) {
__asm__ volatile("fildll (%0); fistpll (%1)"
:
: "r" (from), "r" (to)
: "memory" );
++from;
++to;
}
} else {
while (count-- > 0) {
__asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
:
: "r" (from), "r" (to), "r" (count)
: "memory" );
}
}
#endif // AMD64
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#else
assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jints(from, to, count);
#else
pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count);
#endif // AMD64
}
// Empty for build system
#endif // OS_CPU_BSD_X86_COPY_BSD_X86_HPP

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -26,163 +26,6 @@
#ifndef OS_CPU_LINUX_AARCH64_COPY_LINUX_AARCH64_HPP
#define OS_CPU_LINUX_AARCH64_COPY_LINUX_AARCH64_HPP
#define COPY_SMALL(from, to, count) \
{ \
long tmp0, tmp1, tmp2, tmp3; \
long tmp4, tmp5, tmp6, tmp7; \
__asm volatile( \
" adr %[t0], 0f;" \
" add %[t0], %[t0], %[cnt], lsl #5;" \
" br %[t0];" \
" .align 5;" \
"0:" \
" b 1f;" \
" .align 5;" \
" ldr %[t0], [%[s], #0];" \
" str %[t0], [%[d], #0];" \
" b 1f;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" stp %[t0], %[t1], [%[d], #0];" \
" b 1f;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" ldr %[t2], [%[s], #16];" \
" stp %[t0], %[t1], [%[d], #0];" \
" str %[t2], [%[d], #16];" \
" b 1f;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" ldp %[t2], %[t3], [%[s], #16];" \
" stp %[t0], %[t1], [%[d], #0];" \
" stp %[t2], %[t3], [%[d], #16];" \
" b 1f;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" ldp %[t2], %[t3], [%[s], #16];" \
" ldr %[t4], [%[s], #32];" \
" stp %[t0], %[t1], [%[d], #0];" \
" stp %[t2], %[t3], [%[d], #16];" \
" str %[t4], [%[d], #32];" \
" b 1f;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" ldp %[t2], %[t3], [%[s], #16];" \
" ldp %[t4], %[t5], [%[s], #32];" \
"2:" \
" stp %[t0], %[t1], [%[d], #0];" \
" stp %[t2], %[t3], [%[d], #16];" \
" stp %[t4], %[t5], [%[d], #32];" \
" b 1f;" \
" .align 5;" \
" ldr %[t6], [%[s], #0];" \
" ldp %[t0], %[t1], [%[s], #8];" \
" ldp %[t2], %[t3], [%[s], #24];" \
" ldp %[t4], %[t5], [%[s], #40];" \
" str %[t6], [%[d]], #8;" \
" b 2b;" \
" .align 5;" \
" ldp %[t0], %[t1], [%[s], #0];" \
" ldp %[t2], %[t3], [%[s], #16];" \
" ldp %[t4], %[t5], [%[s], #32];" \
" ldp %[t6], %[t7], [%[s], #48];" \
" stp %[t0], %[t1], [%[d], #0];" \
" stp %[t2], %[t3], [%[d], #16];" \
" stp %[t4], %[t5], [%[d], #32];" \
" stp %[t6], %[t7], [%[d], #48];" \
"1:" \
\
: [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count), \
[t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \
[t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7) \
: \
: "memory", "cc"); \
}
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_conjoint_words(from, to, count);
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
if (__builtin_constant_p(count)) {
memcpy(to, from, count * sizeof(HeapWord));
return;
}
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
__asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
if (__builtin_expect(count <= 8, 1)) {
COPY_SMALL(from, to, count);
return;
}
_Copy_disjoint_words(from, to, count);
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
(void)memmove(to, from, count);
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
_Copy_conjoint_jints_atomic(from, to, count);
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
_Copy_conjoint_jlongs_atomic(from, to, count);
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jints(from, to, count);
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
assert(!UseCompressedOops, "foo!");
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
}
// Empty for build system
#endif // OS_CPU_LINUX_AARCH64_COPY_LINUX_AARCH64_HPP

@ -1,5 +1,5 @@
#
# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -58,12 +58,6 @@ to .req r1
SpinPause:
bx LR
# Support for void Copy::conjoint_bytes(void* from,
# void* to,
# size_t count)
_Copy_conjoint_bytes:
swi 0x9f0001
# Support for void Copy::arrayof_conjoint_bytes(void* from,
# void* to,
# size_t count)

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,285 +25,6 @@
#ifndef OS_CPU_LINUX_X86_COPY_LINUX_X86_HPP
#define OS_CPU_LINUX_X86_COPY_LINUX_X86_HPP
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count * HeapWordSize);
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 7f ;"
" cmpl %4,%5 ;"
" leal -4(%4,%6,4),%3;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 4f ;"
"1: cmpl $32,%6 ;"
" ja 3f ;"
" subl %4,%1 ;"
"2: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 2b ;"
" jmp 7f ;"
"3: rep; smovl ;"
" jmp 7f ;"
"4: cmpl $32,%2 ;"
" movl %7,%0 ;"
" leal -4(%5,%6,4),%1;"
" ja 6f ;"
" subl %4,%1 ;"
"5: movl (%4),%3 ;"
" movl %7,(%5,%4,1) ;"
" subl $4,%0 ;"
" subl $1,%2 ;"
" jnz 5b ;"
" jmp 7f ;"
"6: std ;"
" rep; smovl ;"
" cld ;"
"7: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags");
#endif // AMD64
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
(void)memcpy(to, from, count * HeapWordSize);
break;
}
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 3f ;"
" cmpl $32,%6 ;"
" ja 2f ;"
" subl %4,%1 ;"
"1: movl (%4),%3 ;"
" movl %7,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 1b ;"
" jmp 3f ;"
"2: rep; smovl ;"
"3: nop "
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "cc");
#endif // AMD64
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
while (count-- > 0) {
*to++ = *from++;
}
break;
}
#else
// pd_disjoint_words is word-atomic in this implementation.
pd_disjoint_words(from, to, count);
#endif // AMD64
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_conjoint_words(from, to, count);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
#ifdef AMD64
(void)memmove(to, from, count);
#else
// Includes a zero-count check.
intx temp = 0;
__asm__ volatile(" testl %6,%6 ;"
" jz 13f ;"
" cmpl %4,%5 ;"
" leal -1(%4,%6),%3 ;"
" jbe 1f ;"
" cmpl %7,%5 ;"
" jbe 8f ;"
"1: cmpl $3,%6 ;"
" jbe 6f ;"
" movl %6,%3 ;"
" movl $4,%2 ;"
" subl %4,%2 ;"
" andl $3,%2 ;"
" jz 2f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"2: movl %7,%2 ;"
" shrl $2,%2 ;"
" jz 5f ;"
" cmpl $32,%2 ;"
" ja 4f ;"
" subl %4,%1 ;"
"3: movl (%4),%%edx ;"
" movl %%edx,(%5,%4,1);"
" addl $4,%0 ;"
" subl $1,%2 ;"
" jnz 3b ;"
" addl %4,%1 ;"
" jmp 5f ;"
"4: rep; smovl ;"
"5: movl %7,%2 ;"
" andl $3,%2 ;"
" jz 13f ;"
"6: xorl %7,%3 ;"
"7: movb (%4,%7,1),%%dl ;"
" movb %%dl,(%5,%7,1) ;"
" addl $1,%3 ;"
" subl $1,%2 ;"
" jnz 7b ;"
" jmp 13f ;"
"8: std ;"
" cmpl $12,%2 ;"
" ja 9f ;"
" movl %7,%0 ;"
" leal -1(%6,%5),%1 ;"
" jmp 11f ;"
"9: xchgl %3,%2 ;"
" movl %6,%0 ;"
" addl $1,%2 ;"
" leal -1(%7,%5),%1 ;"
" andl $3,%2 ;"
" jz 10f ;"
" subl %6,%3 ;"
" rep; smovb ;"
"10: movl %7,%2 ;"
" subl $3,%0 ;"
" shrl $2,%2 ;"
" subl $3,%1 ;"
" rep; smovl ;"
" andl $3,%3 ;"
" jz 12f ;"
" movl %7,%2 ;"
" addl $3,%0 ;"
" addl $3,%1 ;"
"11: rep; smovb ;"
"12: cld ;"
"13: nop ;"
: "=S" (from), "=D" (to), "=c" (count), "=r" (temp)
: "0" (from), "1" (to), "2" (count), "3" (temp)
: "memory", "flags", "%edx");
#endif // AMD64
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
_Copy_conjoint_jshorts_atomic(from, to, count);
}
static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jints_atomic(from, to, count);
#else
assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
#ifdef AMD64
_Copy_conjoint_jlongs_atomic(from, to, count);
#else
// Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't.
if (from > to) {
while (count-- > 0) {
__asm__ volatile("fildll (%0); fistpll (%1)"
:
: "r" (from), "r" (to)
: "memory" );
++from;
++to;
}
} else {
while (count-- > 0) {
__asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)"
:
: "r" (from), "r" (to), "r" (count)
: "memory" );
}
}
#endif // AMD64
}
static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#else
assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
// pd_conjoint_words is word-atomic in this implementation.
pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_bytes(from, to, count);
}
static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
_Copy_arrayof_conjoint_jshorts(from, to, count);
}
static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jints(from, to, count);
#else
pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count);
#endif // AMD64
}
static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
_Copy_arrayof_conjoint_jlongs(from, to, count);
#else
pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count);
#endif // AMD64
}
// Empty for build system
#endif // OS_CPU_LINUX_X86_COPY_LINUX_X86_HPP

@ -1,5 +1,5 @@
#
# Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2004, 2022, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@ -28,7 +28,6 @@
# point or use it in the same manner as does the server
# compiler.
.globl _Copy_conjoint_bytes
.globl _Copy_arrayof_conjoint_bytes
.globl _Copy_conjoint_jshorts_atomic
.globl _Copy_arrayof_conjoint_jshorts
@ -51,117 +50,10 @@ SpinPause:
movl $1, %eax
ret
# Support for void Copy::conjoint_bytes(void* from,
# void* to,
# size_t count)
.p2align 4,,15
.type _Copy_conjoint_bytes,@function
_Copy_conjoint_bytes:
pushl %esi
movl 4+12(%esp),%ecx # count
pushl %edi
movl 8+ 4(%esp),%esi # from
movl 8+ 8(%esp),%edi # to
cmpl %esi,%edi
leal -1(%esi,%ecx),%eax # from + count - 1
jbe cb_CopyRight
cmpl %eax,%edi
jbe cb_CopyLeft
# copy from low to high
cb_CopyRight:
cmpl $3,%ecx
jbe 5f # <= 3 bytes
# align source address at dword address boundary
movl %ecx,%eax # original count
movl $4,%ecx
subl %esi,%ecx
andl $3,%ecx # prefix byte count
jz 1f # no prefix
subl %ecx,%eax # byte count less prefix
# copy prefix
subl %esi,%edi
0: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
addl $1,%esi
subl $1,%ecx
jnz 0b
addl %esi,%edi
1: movl %eax,%ecx # byte count less prefix
shrl $2,%ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
jbe 2f # <= 32 dwords
# copy aligned dwords
rep; smovl
jmp 4f
# copy aligned dwords
2: subl %esi,%edi
.p2align 4,,15
3: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
addl $4,%esi
subl $1,%ecx
jnz 3b
addl %esi,%edi
4: movl %eax,%ecx # byte count less prefix
5: andl $3,%ecx # suffix byte count
jz 7f # no suffix
# copy suffix
xorl %eax,%eax
6: movb (%esi,%eax,1),%dl
movb %dl,(%edi,%eax,1)
addl $1,%eax
subl $1,%ecx
jnz 6b
7: popl %edi
popl %esi
ret
# copy from high to low
cb_CopyLeft:
std
leal -4(%edi,%ecx),%edi # to + count - 4
movl %eax,%esi # from + count - 1
movl %ecx,%eax
subl $3,%esi # from + count - 4
cmpl $3,%ecx
jbe 5f # <= 3 bytes
1: shrl $2,%ecx # dword count
jz 4f # no dwords to move
cmpl $32,%ecx
ja 3f # > 32 dwords
# copy dwords, aligned or not
subl %esi,%edi
.p2align 4,,15
2: movl (%esi),%edx
movl %edx,(%edi,%esi,1)
subl $4,%esi
subl $1,%ecx
jnz 2b
addl %esi,%edi
jmp 4f
# copy dwords, aligned or not
3: rep; smovl
4: movl %eax,%ecx # byte count
5: andl $3,%ecx # suffix byte count
jz 7f # no suffix
# copy suffix
subl %esi,%edi
addl $3,%esi
6: movb (%esi),%dl
movb %dl,(%edi,%esi,1)
subl $1,%esi
subl $1,%ecx
jnz 6b
7: cld
popl %edi
popl %esi
ret
# Support for void Copy::arrayof_conjoint_bytes(void* from,
# void* to,
# size_t count)
#
# Same as _Copy_conjoint_bytes, except no source alignment check.
.p2align 4,,15
.type _Copy_arrayof_conjoint_bytes,@function
_Copy_arrayof_conjoint_bytes:

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,64 +25,6 @@
#ifndef OS_CPU_WINDOWS_X86_COPY_WINDOWS_X86_HPP
#define OS_CPU_WINDOWS_X86_COPY_WINDOWS_X86_HPP
static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
(void)memmove(to, from, count * HeapWordSize);
}
static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
#ifdef AMD64
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default:
(void)memcpy(to, from, count * HeapWordSize);
break;
}
#else
(void)memcpy(to, from, count * HeapWordSize);
#endif // AMD64
}
static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
switch (count) {
case 8: to[7] = from[7];
case 7: to[6] = from[6];
case 6: to[5] = from[5];
case 5: to[4] = from[4];
case 4: to[3] = from[3];
case 3: to[2] = from[2];
case 2: to[1] = from[1];
case 1: to[0] = from[0];
case 0: break;
default: while (count-- > 0) {
*to++ = *from++;
}
break;
}
}
static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
(void)memmove(to, from, count * HeapWordSize);
}
static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
pd_disjoint_words(from, to, count);
}
static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
(void)memmove(to, from, count);
}
static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
pd_conjoint_bytes(from, to, count);
}
static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
if (from > to) {

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -37,15 +37,6 @@ extern "C" {
void _Copy_conjoint_words(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_disjoint_words(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_conjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_conjoint_bytes(const void* from, void* to, size_t count);
void _Copy_conjoint_bytes_atomic (const void* from, void* to, size_t count);
void _Copy_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count);
void _Copy_conjoint_jints_atomic (const jint* from, jint* to, size_t count);
void _Copy_conjoint_jlongs_atomic (const jlong* from, jlong* to, size_t count);
@ -55,7 +46,6 @@ extern "C" {
void _Copy_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count);
void _Copy_arrayof_conjoint_jints (const HeapWord* from, HeapWord* to, size_t count);
void _Copy_arrayof_conjoint_jlongs (const HeapWord* from, HeapWord* to, size_t count);
void _Copy_arrayof_conjoint_oops (const HeapWord* from, HeapWord* to, size_t count);
}
class Copy : AllStatic {