3789983e89
Reviewed-by: darcy, ihse
381 lines
13 KiB
ArmAsm
381 lines
13 KiB
ArmAsm
#
|
|
# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
|
|
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
#
|
|
# This code is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License version 2 only, as
|
|
# published by the Free Software Foundation.
|
|
#
|
|
# This code is distributed in the hope that it will be useful, but WITHOUT
|
|
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
# version 2 for more details (a copy is included in the LICENSE file that
|
|
# accompanied this code).
|
|
#
|
|
# You should have received a copy of the GNU General Public License version
|
|
# 2 along with this work; if not, write to the Free Software Foundation,
|
|
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
#
|
|
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
# or visit www.oracle.com if you need additional information or have any
|
|
# questions.
|
|
#
|
|
|
|
|
|
# NOTE WELL! The _Copy functions are called directly
|
|
# from server-compiler-generated code via CallLeafNoFP,
|
|
# which means that they *must* either not use floating
|
|
# point or use it in the same manner as does the server
|
|
# compiler.
|
|
|
|
.globl _Copy_arrayof_conjoint_bytes
|
|
.globl _Copy_arrayof_conjoint_jshorts
|
|
.globl _Copy_conjoint_jshorts_atomic
|
|
.globl _Copy_arrayof_conjoint_jints
|
|
.globl _Copy_conjoint_jints_atomic
|
|
.globl _Copy_arrayof_conjoint_jlongs
|
|
.globl _Copy_conjoint_jlongs_atomic
|
|
|
|
.text
|
|
|
|
.globl SpinPause
|
|
.align 16
|
|
.type SpinPause,@function
|
|
SpinPause:
|
|
rep
|
|
nop
|
|
movq $1, %rax
|
|
ret
|
|
|
|
# Support for void Copy::arrayof_conjoint_bytes(void* from,
|
|
# void* to,
|
|
# size_t count)
|
|
# rdi - from
|
|
# rsi - to
|
|
# rdx - count, treated as ssize_t
|
|
#
|
|
.p2align 4,,15
|
|
.type _Copy_arrayof_conjoint_bytes,@function
|
|
_Copy_arrayof_conjoint_bytes:
|
|
movq %rdx,%r8 # byte count
|
|
shrq $3,%rdx # qword count
|
|
cmpq %rdi,%rsi
|
|
leaq -1(%rdi,%r8,1),%rax # from + bcount*1 - 1
|
|
jbe acb_CopyRight
|
|
cmpq %rax,%rsi
|
|
jbe acb_CopyLeft
|
|
acb_CopyRight:
|
|
leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
negq %rdx
|
|
jmp 7f
|
|
.p2align 4,,15
|
|
1: movq 8(%rax,%rdx,8),%rsi
|
|
movq %rsi,8(%rcx,%rdx,8)
|
|
addq $1,%rdx
|
|
jnz 1b
|
|
2: testq $4,%r8 # check for trailing dword
|
|
jz 3f
|
|
movl 8(%rax),%esi # copy trailing dword
|
|
movl %esi,8(%rcx)
|
|
addq $4,%rax
|
|
addq $4,%rcx # original %rsi is trashed, so we
|
|
# can't use it as a base register
|
|
3: testq $2,%r8 # check for trailing word
|
|
jz 4f
|
|
movw 8(%rax),%si # copy trailing word
|
|
movw %si,8(%rcx)
|
|
addq $2,%rcx
|
|
4: testq $1,%r8 # check for trailing byte
|
|
jz 5f
|
|
movb -1(%rdi,%r8,1),%al # copy trailing byte
|
|
movb %al,8(%rcx)
|
|
5: ret
|
|
.p2align 4,,15
|
|
6: movq -24(%rax,%rdx,8),%rsi
|
|
movq %rsi,-24(%rcx,%rdx,8)
|
|
movq -16(%rax,%rdx,8),%rsi
|
|
movq %rsi,-16(%rcx,%rdx,8)
|
|
movq -8(%rax,%rdx,8),%rsi
|
|
movq %rsi,-8(%rcx,%rdx,8)
|
|
movq (%rax,%rdx,8),%rsi
|
|
movq %rsi,(%rcx,%rdx,8)
|
|
7: addq $4,%rdx
|
|
jle 6b
|
|
subq $4,%rdx
|
|
jl 1b
|
|
jmp 2b
|
|
acb_CopyLeft:
|
|
testq $1,%r8 # check for trailing byte
|
|
jz 1f
|
|
movb -1(%rdi,%r8,1),%cl # copy trailing byte
|
|
movb %cl,-1(%rsi,%r8,1)
|
|
subq $1,%r8 # adjust for possible trailing word
|
|
1: testq $2,%r8 # check for trailing word
|
|
jz 2f
|
|
movw -2(%rdi,%r8,1),%cx # copy trailing word
|
|
movw %cx,-2(%rsi,%r8,1)
|
|
2: testq $4,%r8 # check for trailing dword
|
|
jz 5f
|
|
movl (%rdi,%rdx,8),%ecx # copy trailing dword
|
|
movl %ecx,(%rsi,%rdx,8)
|
|
jmp 5f
|
|
.p2align 4,,15
|
|
3: movq -8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,-8(%rsi,%rdx,8)
|
|
subq $1,%rdx
|
|
jnz 3b
|
|
ret
|
|
.p2align 4,,15
|
|
4: movq 24(%rdi,%rdx,8),%rcx
|
|
movq %rcx,24(%rsi,%rdx,8)
|
|
movq 16(%rdi,%rdx,8),%rcx
|
|
movq %rcx,16(%rsi,%rdx,8)
|
|
movq 8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,8(%rsi,%rdx,8)
|
|
movq (%rdi,%rdx,8),%rcx
|
|
movq %rcx,(%rsi,%rdx,8)
|
|
5: subq $4,%rdx
|
|
jge 4b
|
|
addq $4,%rdx
|
|
jg 3b
|
|
ret
|
|
|
|
# Support for void Copy::arrayof_conjoint_jshorts(void* from,
|
|
# void* to,
|
|
# size_t count)
|
|
# Equivalent to
|
|
# conjoint_jshorts_atomic
|
|
#
|
|
# If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
|
|
# let the hardware handle it. The tow or four words within dwords
|
|
# or qwords that span cache line boundaries will still be loaded
|
|
# and stored atomically.
|
|
#
|
|
# rdi - from
|
|
# rsi - to
|
|
# rdx - count, treated as ssize_t
|
|
#
|
|
.p2align 4,,15
|
|
.type _Copy_arrayof_conjoint_jshorts,@function
|
|
.type _Copy_conjoint_jshorts_atomic,@function
|
|
_Copy_arrayof_conjoint_jshorts:
|
|
_Copy_conjoint_jshorts_atomic:
|
|
movq %rdx,%r8 # word count
|
|
shrq $2,%rdx # qword count
|
|
cmpq %rdi,%rsi
|
|
leaq -2(%rdi,%r8,2),%rax # from + wcount*2 - 2
|
|
jbe acs_CopyRight
|
|
cmpq %rax,%rsi
|
|
jbe acs_CopyLeft
|
|
acs_CopyRight:
|
|
leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
negq %rdx
|
|
jmp 6f
|
|
1: movq 8(%rax,%rdx,8),%rsi
|
|
movq %rsi,8(%rcx,%rdx,8)
|
|
addq $1,%rdx
|
|
jnz 1b
|
|
2: testq $2,%r8 # check for trailing dword
|
|
jz 3f
|
|
movl 8(%rax),%esi # copy trailing dword
|
|
movl %esi,8(%rcx)
|
|
addq $4,%rcx # original %rsi is trashed, so we
|
|
# can't use it as a base register
|
|
3: testq $1,%r8 # check for trailing word
|
|
jz 4f
|
|
movw -2(%rdi,%r8,2),%si # copy trailing word
|
|
movw %si,8(%rcx)
|
|
4: ret
|
|
.p2align 4,,15
|
|
5: movq -24(%rax,%rdx,8),%rsi
|
|
movq %rsi,-24(%rcx,%rdx,8)
|
|
movq -16(%rax,%rdx,8),%rsi
|
|
movq %rsi,-16(%rcx,%rdx,8)
|
|
movq -8(%rax,%rdx,8),%rsi
|
|
movq %rsi,-8(%rcx,%rdx,8)
|
|
movq (%rax,%rdx,8),%rsi
|
|
movq %rsi,(%rcx,%rdx,8)
|
|
6: addq $4,%rdx
|
|
jle 5b
|
|
subq $4,%rdx
|
|
jl 1b
|
|
jmp 2b
|
|
acs_CopyLeft:
|
|
testq $1,%r8 # check for trailing word
|
|
jz 1f
|
|
movw -2(%rdi,%r8,2),%cx # copy trailing word
|
|
movw %cx,-2(%rsi,%r8,2)
|
|
1: testq $2,%r8 # check for trailing dword
|
|
jz 4f
|
|
movl (%rdi,%rdx,8),%ecx # copy trailing dword
|
|
movl %ecx,(%rsi,%rdx,8)
|
|
jmp 4f
|
|
2: movq -8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,-8(%rsi,%rdx,8)
|
|
subq $1,%rdx
|
|
jnz 2b
|
|
ret
|
|
.p2align 4,,15
|
|
3: movq 24(%rdi,%rdx,8),%rcx
|
|
movq %rcx,24(%rsi,%rdx,8)
|
|
movq 16(%rdi,%rdx,8),%rcx
|
|
movq %rcx,16(%rsi,%rdx,8)
|
|
movq 8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,8(%rsi,%rdx,8)
|
|
movq (%rdi,%rdx,8),%rcx
|
|
movq %rcx,(%rsi,%rdx,8)
|
|
4: subq $4,%rdx
|
|
jge 3b
|
|
addq $4,%rdx
|
|
jg 2b
|
|
ret
|
|
|
|
# Support for void Copy::arrayof_conjoint_jints(jint* from,
|
|
# jint* to,
|
|
# size_t count)
|
|
# Equivalent to
|
|
# conjoint_jints_atomic
|
|
#
|
|
# If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
|
|
# the hardware handle it. The two dwords within qwords that span
|
|
# cache line boundaries will still be loaded and stored atomically.
|
|
#
|
|
# rdi - from
|
|
# rsi - to
|
|
# rdx - count, treated as ssize_t
|
|
#
|
|
.p2align 4,,15
|
|
.type _Copy_arrayof_conjoint_jints,@function
|
|
.type _Copy_conjoint_jints_atomic,@function
|
|
_Copy_arrayof_conjoint_jints:
|
|
_Copy_conjoint_jints_atomic:
|
|
movq %rdx,%r8 # dword count
|
|
shrq %rdx # qword count
|
|
cmpq %rdi,%rsi
|
|
leaq -4(%rdi,%r8,4),%rax # from + dcount*4 - 4
|
|
jbe aci_CopyRight
|
|
cmpq %rax,%rsi
|
|
jbe aci_CopyLeft
|
|
aci_CopyRight:
|
|
leaq -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
|
|
leaq -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
|
|
negq %rdx
|
|
jmp 5f
|
|
.p2align 4,,15
|
|
1: movq 8(%rax,%rdx,8),%rsi
|
|
movq %rsi,8(%rcx,%rdx,8)
|
|
addq $1,%rdx
|
|
jnz 1b
|
|
2: testq $1,%r8 # check for trailing dword
|
|
jz 3f
|
|
movl 8(%rax),%esi # copy trailing dword
|
|
movl %esi,8(%rcx)
|
|
3: ret
|
|
.p2align 4,,15
|
|
4: movq -24(%rax,%rdx,8),%rsi
|
|
movq %rsi,-24(%rcx,%rdx,8)
|
|
movq -16(%rax,%rdx,8),%rsi
|
|
movq %rsi,-16(%rcx,%rdx,8)
|
|
movq -8(%rax,%rdx,8),%rsi
|
|
movq %rsi,-8(%rcx,%rdx,8)
|
|
movq (%rax,%rdx,8),%rsi
|
|
movq %rsi,(%rcx,%rdx,8)
|
|
5: addq $4,%rdx
|
|
jle 4b
|
|
subq $4,%rdx
|
|
jl 1b
|
|
jmp 2b
|
|
aci_CopyLeft:
|
|
testq $1,%r8 # check for trailing dword
|
|
jz 3f
|
|
movl -4(%rdi,%r8,4),%ecx # copy trailing dword
|
|
movl %ecx,-4(%rsi,%r8,4)
|
|
jmp 3f
|
|
1: movq -8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,-8(%rsi,%rdx,8)
|
|
subq $1,%rdx
|
|
jnz 1b
|
|
ret
|
|
.p2align 4,,15
|
|
2: movq 24(%rdi,%rdx,8),%rcx
|
|
movq %rcx,24(%rsi,%rdx,8)
|
|
movq 16(%rdi,%rdx,8),%rcx
|
|
movq %rcx,16(%rsi,%rdx,8)
|
|
movq 8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,8(%rsi,%rdx,8)
|
|
movq (%rdi,%rdx,8),%rcx
|
|
movq %rcx,(%rsi,%rdx,8)
|
|
3: subq $4,%rdx
|
|
jge 2b
|
|
addq $4,%rdx
|
|
jg 1b
|
|
ret
|
|
|
|
# Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
|
|
# jlong* to,
|
|
# size_t count)
|
|
# Equivalent to
|
|
# conjoint_jlongs_atomic
|
|
# arrayof_conjoint_oops
|
|
# conjoint_oops_atomic
|
|
#
|
|
# rdi - from
|
|
# rsi - to
|
|
# rdx - count, treated as ssize_t
|
|
#
|
|
.p2align 4,,15
|
|
.type _Copy_arrayof_conjoint_jlongs,@function
|
|
.type _Copy_conjoint_jlongs_atomic,@function
|
|
_Copy_arrayof_conjoint_jlongs:
|
|
_Copy_conjoint_jlongs_atomic:
|
|
cmpq %rdi,%rsi
|
|
leaq -8(%rdi,%rdx,8),%rax # from + count*8 - 8
|
|
jbe acl_CopyRight
|
|
cmpq %rax,%rsi
|
|
jbe acl_CopyLeft
|
|
acl_CopyRight:
|
|
leaq -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
|
|
negq %rdx
|
|
jmp 3f
|
|
1: movq 8(%rax,%rdx,8),%rsi
|
|
movq %rsi,8(%rcx,%rdx,8)
|
|
addq $1,%rdx
|
|
jnz 1b
|
|
ret
|
|
.p2align 4,,15
|
|
2: movq -24(%rax,%rdx,8),%rsi
|
|
movq %rsi,-24(%rcx,%rdx,8)
|
|
movq -16(%rax,%rdx,8),%rsi
|
|
movq %rsi,-16(%rcx,%rdx,8)
|
|
movq -8(%rax,%rdx,8),%rsi
|
|
movq %rsi,-8(%rcx,%rdx,8)
|
|
movq (%rax,%rdx,8),%rsi
|
|
movq %rsi,(%rcx,%rdx,8)
|
|
3: addq $4,%rdx
|
|
jle 2b
|
|
subq $4,%rdx
|
|
jl 1b
|
|
ret
|
|
4: movq -8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,-8(%rsi,%rdx,8)
|
|
subq $1,%rdx
|
|
jnz 4b
|
|
ret
|
|
.p2align 4,,15
|
|
5: movq 24(%rdi,%rdx,8),%rcx
|
|
movq %rcx,24(%rsi,%rdx,8)
|
|
movq 16(%rdi,%rdx,8),%rcx
|
|
movq %rcx,16(%rsi,%rdx,8)
|
|
movq 8(%rdi,%rdx,8),%rcx
|
|
movq %rcx,8(%rsi,%rdx,8)
|
|
movq (%rdi,%rdx,8),%rcx
|
|
movq %rcx,(%rsi,%rdx,8)
|
|
acl_CopyLeft:
|
|
subq $4,%rdx
|
|
jge 5b
|
|
addq $4,%rdx
|
|
jg 4b
|
|
ret
|