# # Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License version 2 only, as # published by the Free Software Foundation. # # This code is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # version 2 for more details (a copy is included in the LICENSE file that # accompanied this code). # # You should have received a copy of the GNU General Public License version # 2 along with this work; if not, write to the Free Software Foundation, # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. # # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA # or visit www.oracle.com if you need additional information or have any # questions. # #ifdef __APPLE__ # Darwin uses _ prefixed global symbols #define SYMBOL(s) _ ## s #define ELF_TYPE(name, description) #else #define SYMBOL(s) s #define ELF_TYPE(name, description) .type name,description #endif # NOTE WELL! The _Copy functions are called directly # from server-compiler-generated code via CallLeafNoFP, # which means that they *must* either not use floating # point or use it in the same manner as does the server # compiler. .globl SYMBOL(_Copy_arrayof_conjoint_bytes) .globl SYMBOL(_Copy_arrayof_conjoint_jshorts) .globl SYMBOL(_Copy_conjoint_jshorts_atomic) .globl SYMBOL(_Copy_arrayof_conjoint_jints) .globl SYMBOL(_Copy_conjoint_jints_atomic) .globl SYMBOL(_Copy_arrayof_conjoint_jlongs) .globl SYMBOL(_Copy_conjoint_jlongs_atomic) .text .globl SYMBOL(SpinPause) .p2align 4,,15 ELF_TYPE(SpinPause,@function) SYMBOL(SpinPause): rep nop movq $1, %rax ret # Support for void Copy::arrayof_conjoint_bytes(void* from, # void* to, # size_t count) # rdi - from # rsi - to # rdx - count, treated as ssize_t # .p2align 4,,15 ELF_TYPE(_Copy_arrayof_conjoint_bytes,@function) SYMBOL(_Copy_arrayof_conjoint_bytes): movq %rdx,%r8 # byte count shrq $3,%rdx # qword count cmpq %rdi,%rsi leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1 jbe acb_CopyRight cmpq %rax,%rsi jbe acb_CopyLeft acb_CopyRight: leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 negq %rdx jmp 7f .p2align 4,,15 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $4,%r8 # check for trailing dword jz 3f movl 8(%rax),%esi # copy trailing dword movl %esi,8(%rcx) addq $4,%rax addq $4,%rcx # original %rsi is trashed, so we # can't use it as a base register 3: testq $2,%r8 # check for trailing word jz 4f movw 8(%rax),%si # copy trailing word movw %si,8(%rcx) addq $2,%rcx 4: testq $1,%r8 # check for trailing byte jz 5f movb -1(%rdi,%r8,1),%al # copy trailing byte movb %al,8(%rcx) 5: ret .p2align 4,,15 6: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 7: addq $4,%rdx jle 6b subq $4,%rdx jl 1b jmp 2b acb_CopyLeft: testq $1,%r8 # check for trailing byte jz 1f movb -1(%rdi,%r8,1),%cl # copy trailing byte movb %cl,-1(%rsi,%r8,1) subq $1,%r8 # adjust for possible trailing word 1: testq $2,%r8 # check for trailing word jz 2f movw -2(%rdi,%r8,1),%cx # copy trailing word movw %cx,-2(%rsi,%r8,1) 2: testq $4,%r8 # check for trailing dword jz 5f movl (%rdi,%rdx,8),%ecx # copy trailing dword movl %ecx,(%rsi,%rdx,8) jmp 5f .p2align 4,,15 3: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 3b ret .p2align 4,,15 4: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 5: subq $4,%rdx jge 4b addq $4,%rdx jg 3b ret # Support for void Copy::arrayof_conjoint_jshorts(void* from, # void* to, # size_t count) # Equivalent to # conjoint_jshorts_atomic # # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we # let the hardware handle it. The tow or four words within dwords # or qwords that span cache line boundaries will still be loaded # and stored atomically. # # rdi - from # rsi - to # rdx - count, treated as ssize_t # .p2align 4,,15 ELF_TYPE(_Copy_arrayof_conjoint_jshorts,@function) ELF_TYPE(_Copy_conjoint_jshorts_atomic,@function) SYMBOL(_Copy_arrayof_conjoint_jshorts): SYMBOL(_Copy_conjoint_jshorts_atomic): movq %rdx,%r8 # word count shrq $2,%rdx # qword count cmpq %rdi,%rsi leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2 jbe acs_CopyRight cmpq %rax,%rsi jbe acs_CopyLeft acs_CopyRight: leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 negq %rdx jmp 6f 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $2,%r8 # check for trailing dword jz 3f movl 8(%rax),%esi # copy trailing dword movl %esi,8(%rcx) addq $4,%rcx # original %rsi is trashed, so we # can't use it as a base register 3: testq $1,%r8 # check for trailing word jz 4f movw -2(%rdi,%r8,2),%si # copy trailing word movw %si,8(%rcx) 4: ret .p2align 4,,15 5: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 6: addq $4,%rdx jle 5b subq $4,%rdx jl 1b jmp 2b acs_CopyLeft: testq $1,%r8 # check for trailing word jz 1f movw -2(%rdi,%r8,2),%cx # copy trailing word movw %cx,-2(%rsi,%r8,2) 1: testq $2,%r8 # check for trailing dword jz 4f movl (%rdi,%rdx,8),%ecx # copy trailing dword movl %ecx,(%rsi,%rdx,8) jmp 4f 2: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 2b ret .p2align 4,,15 3: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 4: subq $4,%rdx jge 3b addq $4,%rdx jg 2b ret # Support for void Copy::arrayof_conjoint_jints(jint* from, # jint* to, # size_t count) # Equivalent to # conjoint_jints_atomic # # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let # the hardware handle it. The two dwords within qwords that span # cache line boundaries will still be loaded and stored atomically. # # rdi - from # rsi - to # rdx - count, treated as ssize_t # .p2align 4,,15 ELF_TYPE(_Copy_arrayof_conjoint_jints,@function) ELF_TYPE(_Copy_conjoint_jints_atomic,@function) SYMBOL(_Copy_arrayof_conjoint_jints): SYMBOL(_Copy_conjoint_jints_atomic): movq %rdx,%r8 # dword count shrq %rdx # qword count cmpq %rdi,%rsi leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4 jbe aci_CopyRight cmpq %rax,%rsi jbe aci_CopyLeft aci_CopyRight: leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8 leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8 negq %rdx jmp 5f .p2align 4,,15 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b 2: testq $1,%r8 # check for trailing dword jz 3f movl 8(%rax),%esi # copy trailing dword movl %esi,8(%rcx) 3: ret .p2align 4,,15 4: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 5: addq $4,%rdx jle 4b subq $4,%rdx jl 1b jmp 2b aci_CopyLeft: testq $1,%r8 # check for trailing dword jz 3f movl -4(%rdi,%r8,4),%ecx # copy trailing dword movl %ecx,-4(%rsi,%r8,4) jmp 3f 1: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 1b ret .p2align 4,,15 2: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) 3: subq $4,%rdx jge 2b addq $4,%rdx jg 1b ret # Support for void Copy::arrayof_conjoint_jlongs(jlong* from, # jlong* to, # size_t count) # Equivalent to # conjoint_jlongs_atomic # arrayof_conjoint_oops # conjoint_oops_atomic # # rdi - from # rsi - to # rdx - count, treated as ssize_t # .p2align 4,,15 ELF_TYPE(_Copy_arrayof_conjoint_jlongs,@function) ELF_TYPE(_Copy_conjoint_jlongs_atomic,@function) SYMBOL(_Copy_arrayof_conjoint_jlongs): SYMBOL(_Copy_conjoint_jlongs_atomic): cmpq %rdi,%rsi leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8 jbe acl_CopyRight cmpq %rax,%rsi jbe acl_CopyLeft acl_CopyRight: leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8 negq %rdx jmp 3f 1: movq 8(%rax,%rdx,8),%rsi movq %rsi,8(%rcx,%rdx,8) addq $1,%rdx jnz 1b ret .p2align 4,,15 2: movq -24(%rax,%rdx,8),%rsi movq %rsi,-24(%rcx,%rdx,8) movq -16(%rax,%rdx,8),%rsi movq %rsi,-16(%rcx,%rdx,8) movq -8(%rax,%rdx,8),%rsi movq %rsi,-8(%rcx,%rdx,8) movq (%rax,%rdx,8),%rsi movq %rsi,(%rcx,%rdx,8) 3: addq $4,%rdx jle 2b subq $4,%rdx jl 1b ret 4: movq -8(%rdi,%rdx,8),%rcx movq %rcx,-8(%rsi,%rdx,8) subq $1,%rdx jnz 4b ret .p2align 4,,15 5: movq 24(%rdi,%rdx,8),%rcx movq %rcx,24(%rsi,%rdx,8) movq 16(%rdi,%rdx,8),%rcx movq %rcx,16(%rsi,%rdx,8) movq 8(%rdi,%rdx,8),%rcx movq %rcx,8(%rsi,%rdx,8) movq (%rdi,%rdx,8),%rcx movq %rcx,(%rsi,%rdx,8) acl_CopyLeft: subq $4,%rdx jge 5b addq $4,%rdx jg 4b ret