| / |
| / Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. |
| / DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| / |
| / This code is free software; you can redistribute it and/or modify it |
| / under the terms of the GNU General Public License version 2 only, as |
| / published by the Free Software Foundation. |
| / |
| / This code is distributed in the hope that it will be useful, but WITHOUT |
| / ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| / FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| / version 2 for more details (a copy is included in the LICENSE file that |
| / accompanied this code). |
| / |
| / You should have received a copy of the GNU General Public License version |
| / 2 along with this work; if not, write to the Free Software Foundation, |
| / Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| / |
| / Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| / or visit www.oracle.com if you need additional information or have any |
| / questions. |
| / |
| |
| .globl fs_load |
| .globl fs_thread |
| |
| // NOTE WELL! The _Copy functions are called directly |
| // from server-compiler-generated code via CallLeafNoFP, |
| // which means that they *must* either not use floating |
| // point or use it in the same manner as does the server |
| // compiler. |
| |
| .globl _Copy_arrayof_conjoint_bytes |
| .globl _Copy_conjoint_jshorts_atomic |
| .globl _Copy_arrayof_conjoint_jshorts |
| .globl _Copy_conjoint_jints_atomic |
| .globl _Copy_arrayof_conjoint_jints |
| .globl _Copy_conjoint_jlongs_atomic |
| .globl _Copy_arrayof_conjoint_jlongs |
| |
| .section .text,"ax" |
| |
| / Fast thread accessors, used by threadLS_solaris_amd64.cpp |
| .align 16 |
| fs_load: |
| movq %fs:(%rdi),%rax |
| ret |
| |
| .align 16 |
| fs_thread: |
| movq %fs:0x0,%rax |
| ret |
| |
| .globl SpinPause |
| .align 16 |
| SpinPause: |
| rep |
| nop |
| movq $1, %rax |
| ret |
| |
| |
| / Support for void Copy::arrayof_conjoint_bytes(void* from, |
| / void* to, |
| / size_t count) |
| / rdi - from |
| / rsi - to |
| / rdx - count, treated as ssize_t |
| / |
| .align 16 |
| _Copy_arrayof_conjoint_bytes: |
| movq %rdx,%r8 / byte count |
| shrq $3,%rdx / qword count |
| cmpq %rdi,%rsi |
| leaq -1(%rdi,%r8,1),%rax / from + bcount*1 - 1 |
| jbe acb_CopyRight |
| cmpq %rax,%rsi |
| jbe acb_CopyLeft |
| acb_CopyRight: |
| leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 |
| leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 |
| negq %rdx |
| jmp 7f |
| .align 16 |
| 1: movq 8(%rax,%rdx,8),%rsi |
| movq %rsi,8(%rcx,%rdx,8) |
| addq $1,%rdx |
| jnz 1b |
| 2: testq $4,%r8 / check for trailing dword |
| jz 3f |
| movl 8(%rax),%esi / copy trailing dword |
| movl %esi,8(%rcx) |
| addq $4,%rax |
| addq $4,%rcx / original %rsi is trashed, so we |
| / can't use it as a base register |
| 3: testq $2,%r8 / check for trailing word |
| jz 4f |
| movw 8(%rax),%si / copy trailing word |
| movw %si,8(%rcx) |
| addq $2,%rcx |
| 4: testq $1,%r8 / check for trailing byte |
| jz 5f |
| movb -1(%rdi,%r8,1),%al / copy trailing byte |
| movb %al,8(%rcx) |
| 5: ret |
| .align 16 |
| 6: movq -24(%rax,%rdx,8),%rsi |
| movq %rsi,-24(%rcx,%rdx,8) |
| movq -16(%rax,%rdx,8),%rsi |
| movq %rsi,-16(%rcx,%rdx,8) |
| movq -8(%rax,%rdx,8),%rsi |
| movq %rsi,-8(%rcx,%rdx,8) |
| movq (%rax,%rdx,8),%rsi |
| movq %rsi,(%rcx,%rdx,8) |
| 7: addq $4,%rdx |
| jle 6b |
| subq $4,%rdx |
| jl 1b |
| jmp 2b |
| acb_CopyLeft: |
| testq $1,%r8 / check for trailing byte |
| jz 1f |
| movb -1(%rdi,%r8,1),%cl / copy trailing byte |
| movb %cl,-1(%rsi,%r8,1) |
| subq $1,%r8 / adjust for possible trailing word |
| 1: testq $2,%r8 / check for trailing word |
| jz 2f |
| movw -2(%rdi,%r8,1),%cx / copy trailing word |
| movw %cx,-2(%rsi,%r8,1) |
| 2: testq $4,%r8 / check for trailing dword |
| jz 5f |
| movl (%rdi,%rdx,8),%ecx / copy trailing dword |
| movl %ecx,(%rsi,%rdx,8) |
| jmp 5f |
| .align 16 |
| 3: movq -8(%rdi,%rdx,8),%rcx |
| movq %rcx,-8(%rsi,%rdx,8) |
| subq $1,%rdx |
| jnz 3b |
| ret |
| .align 16 |
| 4: movq 24(%rdi,%rdx,8),%rcx |
| movq %rcx,24(%rsi,%rdx,8) |
| movq 16(%rdi,%rdx,8),%rcx |
| movq %rcx,16(%rsi,%rdx,8) |
| movq 8(%rdi,%rdx,8),%rcx |
| movq %rcx,8(%rsi,%rdx,8) |
| movq (%rdi,%rdx,8),%rcx |
| movq %rcx,(%rsi,%rdx,8) |
| 5: subq $4,%rdx |
| jge 4b |
| addq $4,%rdx |
| jg 3b |
| ret |
| |
| / Support for void Copy::arrayof_conjoint_jshorts(void* from, |
| / void* to, |
| / size_t count) |
| / Equivalent to |
| / conjoint_jshorts_atomic |
| / |
| / If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we |
| / let the hardware handle it. The tow or four words within dwords |
| / or qwords that span cache line boundaries will still be loaded |
| / and stored atomically. |
| / |
| / rdi - from |
| / rsi - to |
| / rdx - count, treated as ssize_t |
| / |
| .align 16 |
| _Copy_arrayof_conjoint_jshorts: |
| _Copy_conjoint_jshorts_atomic: |
| movq %rdx,%r8 / word count |
| shrq $2,%rdx / qword count |
| cmpq %rdi,%rsi |
| leaq -2(%rdi,%r8,2),%rax / from + wcount*2 - 2 |
| jbe acs_CopyRight |
| cmpq %rax,%rsi |
| jbe acs_CopyLeft |
| acs_CopyRight: |
| leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 |
| leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 |
| negq %rdx |
| jmp 6f |
| 1: movq 8(%rax,%rdx,8),%rsi |
| movq %rsi,8(%rcx,%rdx,8) |
| addq $1,%rdx |
| jnz 1b |
| 2: testq $2,%r8 / check for trailing dword |
| jz 3f |
| movl 8(%rax),%esi / copy trailing dword |
| movl %esi,8(%rcx) |
| addq $4,%rcx / original %rsi is trashed, so we |
| / can't use it as a base register |
| 3: testq $1,%r8 / check for trailing word |
| jz 4f |
| movw -2(%rdi,%r8,2),%si / copy trailing word |
| movw %si,8(%rcx) |
| 4: ret |
| .align 16 |
| 5: movq -24(%rax,%rdx,8),%rsi |
| movq %rsi,-24(%rcx,%rdx,8) |
| movq -16(%rax,%rdx,8),%rsi |
| movq %rsi,-16(%rcx,%rdx,8) |
| movq -8(%rax,%rdx,8),%rsi |
| movq %rsi,-8(%rcx,%rdx,8) |
| movq (%rax,%rdx,8),%rsi |
| movq %rsi,(%rcx,%rdx,8) |
| 6: addq $4,%rdx |
| jle 5b |
| subq $4,%rdx |
| jl 1b |
| jmp 2b |
| acs_CopyLeft: |
| testq $1,%r8 / check for trailing word |
| jz 1f |
| movw -2(%rdi,%r8,2),%cx / copy trailing word |
| movw %cx,-2(%rsi,%r8,2) |
| 1: testq $2,%r8 / check for trailing dword |
| jz 4f |
| movl (%rdi,%rdx,8),%ecx / copy trailing dword |
| movl %ecx,(%rsi,%rdx,8) |
| jmp 4f |
| 2: movq -8(%rdi,%rdx,8),%rcx |
| movq %rcx,-8(%rsi,%rdx,8) |
| subq $1,%rdx |
| jnz 2b |
| ret |
| .align 16 |
| 3: movq 24(%rdi,%rdx,8),%rcx |
| movq %rcx,24(%rsi,%rdx,8) |
| movq 16(%rdi,%rdx,8),%rcx |
| movq %rcx,16(%rsi,%rdx,8) |
| movq 8(%rdi,%rdx,8),%rcx |
| movq %rcx,8(%rsi,%rdx,8) |
| movq (%rdi,%rdx,8),%rcx |
| movq %rcx,(%rsi,%rdx,8) |
| 4: subq $4,%rdx |
| jge 3b |
| addq $4,%rdx |
| jg 2b |
| ret |
| |
| / Support for void Copy::arrayof_conjoint_jints(jint* from, |
| / jint* to, |
| / size_t count) |
| / Equivalent to |
| / conjoint_jints_atomic |
| / |
| / If 'from' and/or 'to' are aligned on 4-byte boundaries, we let |
| / the hardware handle it. The two dwords within qwords that span |
| / cache line boundaries will still be loaded and stored atomically. |
| / |
| / rdi - from |
| / rsi - to |
| / rdx - count, treated as ssize_t |
| / |
| .align 16 |
| _Copy_arrayof_conjoint_jints: |
| _Copy_conjoint_jints_atomic: |
| movq %rdx,%r8 / dword count |
| shrq %rdx / qword count |
| cmpq %rdi,%rsi |
| leaq -4(%rdi,%r8,4),%rax / from + dcount*4 - 4 |
| jbe aci_CopyRight |
| cmpq %rax,%rsi |
| jbe aci_CopyLeft |
| aci_CopyRight: |
| leaq -8(%rdi,%rdx,8),%rax / from + qcount*8 - 8 |
| leaq -8(%rsi,%rdx,8),%rcx / to + qcount*8 - 8 |
| negq %rdx |
| jmp 5f |
| .align 16 |
| 1: movq 8(%rax,%rdx,8),%rsi |
| movq %rsi,8(%rcx,%rdx,8) |
| addq $1,%rdx |
| jnz 1b |
| 2: testq $1,%r8 / check for trailing dword |
| jz 3f |
| movl 8(%rax),%esi / copy trailing dword |
| movl %esi,8(%rcx) |
| 3: ret |
| .align 16 |
| 4: movq -24(%rax,%rdx,8),%rsi |
| movq %rsi,-24(%rcx,%rdx,8) |
| movq -16(%rax,%rdx,8),%rsi |
| movq %rsi,-16(%rcx,%rdx,8) |
| movq -8(%rax,%rdx,8),%rsi |
| movq %rsi,-8(%rcx,%rdx,8) |
| movq (%rax,%rdx,8),%rsi |
| movq %rsi,(%rcx,%rdx,8) |
| 5: addq $4,%rdx |
| jle 4b |
| subq $4,%rdx |
| jl 1b |
| jmp 2b |
| aci_CopyLeft: |
| testq $1,%r8 / check for trailing dword |
| jz 3f |
| movl -4(%rdi,%r8,4),%ecx / copy trailing dword |
| movl %ecx,-4(%rsi,%r8,4) |
| jmp 3f |
| 1: movq -8(%rdi,%rdx,8),%rcx |
| movq %rcx,-8(%rsi,%rdx,8) |
| subq $1,%rdx |
| jnz 1b |
| ret |
| .align 16 |
| 2: movq 24(%rdi,%rdx,8),%rcx |
| movq %rcx,24(%rsi,%rdx,8) |
| movq 16(%rdi,%rdx,8),%rcx |
| movq %rcx,16(%rsi,%rdx,8) |
| movq 8(%rdi,%rdx,8),%rcx |
| movq %rcx,8(%rsi,%rdx,8) |
| movq (%rdi,%rdx,8),%rcx |
| movq %rcx,(%rsi,%rdx,8) |
| 3: subq $4,%rdx |
| jge 2b |
| addq $4,%rdx |
| jg 1b |
| ret |
| |
| / Support for void Copy::arrayof_conjoint_jlongs(jlong* from, |
| / jlong* to, |
| / size_t count) |
| / Equivalent to |
| / conjoint_jlongs_atomic |
| / arrayof_conjoint_oops |
| / conjoint_oops_atomic |
| / |
| / rdi - from |
| / rsi - to |
| / rdx - count, treated as ssize_t |
| / |
| .align 16 |
| _Copy_arrayof_conjoint_jlongs: |
| _Copy_conjoint_jlongs_atomic: |
| cmpq %rdi,%rsi |
| leaq -8(%rdi,%rdx,8),%rax / from + count*8 - 8 |
| jbe acl_CopyRight |
| cmpq %rax,%rsi |
| jbe acl_CopyLeft |
| acl_CopyRight: |
| leaq -8(%rsi,%rdx,8),%rcx / to + count*8 - 8 |
| negq %rdx |
| jmp 3f |
| 1: movq 8(%rax,%rdx,8),%rsi |
| movq %rsi,8(%rcx,%rdx,8) |
| addq $1,%rdx |
| jnz 1b |
| ret |
| .align 16 |
| 2: movq -24(%rax,%rdx,8),%rsi |
| movq %rsi,-24(%rcx,%rdx,8) |
| movq -16(%rax,%rdx,8),%rsi |
| movq %rsi,-16(%rcx,%rdx,8) |
| movq -8(%rax,%rdx,8),%rsi |
| movq %rsi,-8(%rcx,%rdx,8) |
| movq (%rax,%rdx,8),%rsi |
| movq %rsi,(%rcx,%rdx,8) |
| 3: addq $4,%rdx |
| jle 2b |
| subq $4,%rdx |
| jl 1b |
| ret |
| 4: movq -8(%rdi,%rdx,8),%rcx |
| movq %rcx,-8(%rsi,%rdx,8) |
| subq $1,%rdx |
| jnz 4b |
| ret |
| .align 16 |
| 5: movq 24(%rdi,%rdx,8),%rcx |
| movq %rcx,24(%rsi,%rdx,8) |
| movq 16(%rdi,%rdx,8),%rcx |
| movq %rcx,16(%rsi,%rdx,8) |
| movq 8(%rdi,%rdx,8),%rcx |
| movq %rcx,8(%rsi,%rdx,8) |
| movq (%rdi,%rdx,8),%rcx |
| movq %rcx,(%rsi,%rdx,8) |
| acl_CopyLeft: |
| subq $4,%rdx |
| jge 5b |
| addq $4,%rdx |
| jg 4b |
| ret |