| |
| ##--------------------------------------------------------------------## |
| ##--- Support for doing system calls. syscall-amd64-linux.S ---## |
| ##--------------------------------------------------------------------## |
| |
| /* |
| This file is part of Valgrind, a dynamic binary instrumentation |
| framework. |
| |
| Copyright (C) 2000-2005 Julian Seward |
| jseward@acm.org |
| |
| This program is free software; you can redistribute it and/or |
| modify it under the terms of the GNU General Public License as |
| published by the Free Software Foundation; either version 2 of the |
| License, or (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 02111-1307, USA. |
| |
| The GNU General Public License is contained in the file COPYING. |
| */ |
| |
| #include "core_asm.h" |
| #include "vki_unistd.h" |
| #include "libvex_guest_offsets.h" |
| |
| /* |
| Perform a Linux syscall with the "syscall" instruction. |
| |
| Incoming args (syscall number + up to 6 args) come in |
| %rdi, %rsi, %rdx, %rcx, %r8, %r9, and the last one on the stack |
| (ie. the C calling convention). |
| |
| They are passed to the syscall in the regs |
| %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx), ie. the |
| kernel's syscall calling convention. |
| |
| %rax holds the syscall number and gets the return value. |
| %rcx and %r11 are clobbered by the syscall; no matter, they |
| are caller-save (the syscall clobbers no callee-save regs, so |
| we don't have to do any register saving/restoring). |
| |
| This has no effect on the virtual machine; the expectation is |
| that the syscall mechanism makes no useful changes to any |
| register except %rax, which is returned. |
| */ |
| .globl VG_(do_syscall) |
| VG_(do_syscall): |
| # Convert function calling convention --> syscall calling convention |
| movq %rdi, %rax |
| movq %rsi, %rdi |
| movq %rdx, %rsi |
| movq %rcx, %rdx |
| movq %r8, %r10 |
| movq %r9, %r8 |
| movq 8(%rsp), %r9 # last arg from stack |
| syscall |
| ret |
| |
| |
| /* |
| Perform a clone system call. clone is strange because it has |
| fork()-like return-twice semantics, so it needs special |
| handling here. |
| |
| Upon entry, we have: |
| |
| int (*fn)(void*) in %rdi |
| void* child_stack in %rsi |
| int flags in %rdx |
| void* arg in %rcx |
| pid_t* child_tid in %r8 |
| pid_t* parent_tid in %r9 |
| void* tls_ptr at 8(%rsp) |
| |
| System call requires: |
| |
| int flags in %rdi |
| void* child_stack in %rsi |
| pid_t* parent_tid in %rdx |
| pid_t* child_tid in %r10 |
| void* tls_ptr in %r8 |
| |
| */ |
| .globl VG_(clone) |
| VG_(clone): |
| // set up child stack, temporarily preserving fn and arg |
| subq $16, %rsi // make space on stack |
| movq %rcx, 8(%rsi) // save arg |
| movq %rdi, 0(%rsi) // save fn |
| |
| // setup syscall |
| movq $__NR_clone, %rax // syscall number |
| movq %rdx, %rdi // syscall arg1: flags |
| // %rsi already setup // syscall arg2: child_stack |
| movq %r9, %rdx // syscall arg3: parent_tid |
| movq %r8, %r10 // syscall arg4: child_tid |
| movq 8(%rsp), %r8 // syscall arg5: tls_ptr |
| |
| syscall // clone() |
| |
| testq %rax, %rax // child if retval == 0 |
| jnz 1f |
| |
| // CHILD - call thread function |
| pop %rax // pop fn |
| pop %rdi // pop fn arg1: arg |
| call *%rax // call fn |
| |
| // exit with result |
| movq %rax, %rdi // arg1: return value from fn |
| movq $__NR_exit, %rax |
| |
| syscall |
| |
| // Exit returned?! |
| ud2 |
| |
| 1: // PARENT or ERROR |
| ret |
| |
| .globl VG_(sigreturn) |
| VG_(sigreturn): |
| movq $__NR_rt_sigreturn, %rax |
| syscall |
| |
| /*----------------------------------------------------------------*/ |
| /* |
| Perform a syscall for the client. This will run a syscall |
| with the client's specific per-thread signal mask. |
| |
| The structure of this function is such that, if the syscall is |
| interrupted by a signal, we can determine exactly what |
| execution state we were in with respect to the execution of |
| the syscall by examining the value of %eip in the signal |
| handler. This means that we can always do the appropriate |
| thing to precisely emulate the kernel's signal/syscall |
| interactions. |
| |
| The syscall number is taken from the argument, even though it |
| should also be in guest_state->guest_RAX. The syscall result |
| is written back to guest_state->guest_RAX on completion. |
| |
| Returns 0 if the syscall was successfully called (even if the |
| syscall itself failed), or a -ve error code if one of the |
| sigprocmasks failed (there's no way to determine which one |
| failed). |
| |
| VGA_(interrupted_syscall)() does the thread state fixup in the |
| case where we were interrupted by a signal. |
| |
| Prototype: |
| |
| Int VGA_(_client_syscall)(Int syscallno, // rdi |
| void* guest_state, // rsi |
| const vki_sigset_t *sysmask, // rdx |
| const vki_sigset_t *postmask, // rcx |
| Int nsigwords) // r8 |
| |
| */ |
| |
| /* from vki_arch.h */ |
| #define VKI_SIG_SETMASK 2 |
| |
| .globl VGA_(_client_syscall) |
| VGA_(_client_syscall): |
| /* save callee-saved regs */ |
| pushq %rbx |
| pushq %rbp |
| pushq %r12 |
| pushq %r13 |
| pushq %r14 |
| pushq %r15 |
| |
| #define FSZ ((4+1)*4) /* 4 args + ret addr */ |
| |
| #define PUSH_di_si_dx_cx_8 \ |
| pushq %rdi ; \ |
| pushq %rsi ; \ |
| pushq %rdx ; \ |
| pushq %rcx ; \ |
| pushq %r8 |
| |
| #define POP_di_si_dx_cx_8 \ |
| popq %r8 ; \ |
| popq %rcx ; \ |
| popq %rdx ; \ |
| popq %rsi ; \ |
| popq %rdi |
| |
| 1: /* Even though we can't take a signal until the sigprocmask completes, |
| start the range early. |
| If eip is in the range [1,2), the syscall hasn't been started yet */ |
| |
| /* Set the signal mask which should be current during the syscall. */ |
| /* Save and restore all 5 arg regs round the call. This is easier |
| than figuring out the minimal set to save/restore. */ |
| |
| PUSH_di_si_dx_cx_8 |
| |
| movq $__NR_rt_sigprocmask, %rax // syscall # |
| movq $VKI_SIG_SETMASK, %rdi // how |
| movq %rdx, %rsi // sysmask |
| movq %rcx, %rdx // postmask |
| movq %r8, %r10 // nsigwords |
| syscall |
| |
| POP_di_si_dx_cx_8 |
| |
| testl %eax, %eax |
| js 5f /* sigprocmask failed */ |
| |
| /* OK, that worked. Now do the syscall proper. */ |
| |
| PUSH_di_si_dx_cx_8 |
| |
| movq %rsi, %rax /* rax --> VexGuestAMD64State * */ |
| pushq %rdi /* syscallno -> stack */ |
| movq OFFSET_amd64_RDI(%rax), %rdi |
| movq OFFSET_amd64_RSI(%rax), %rsi |
| movq OFFSET_amd64_RDX(%rax), %rdx |
| movq OFFSET_amd64_R10(%rax), %r10 |
| movq OFFSET_amd64_R8(%rax), %r8 |
| movq OFFSET_amd64_R9(%rax), %r9 |
| popq %rax /* syscallno -> %rax */ |
| |
| /* If rip==2, then the syscall was either just about |
| to start, or was interrupted and the kernel was |
| restarting it. */ |
| 2: syscall |
| 3: /* In the range [3, 4), the syscall result is in %rax, |
| but hasn't been committed to RAX. */ |
| |
| POP_di_si_dx_cx_8 |
| |
| movq %rax, OFFSET_amd64_RAX(%rsi) /* save back to RAX */ |
| |
| 4: /* Re-block signals. If eip is in [4,5), then the syscall is complete and |
| we needn't worry about it. */ |
| PUSH_di_si_dx_cx_8 |
| |
| movq $__NR_rt_sigprocmask, %rax // syscall # |
| movq $VKI_SIG_SETMASK, %rdi // how |
| movq %rcx, %rsi // postmask |
| xorq %rdx, %rdx // NULL |
| movq %r8, %r10 // nsigwords |
| syscall |
| |
| POP_di_si_dx_cx_8 |
| |
| 5: /* now safe from signals */ |
| |
| popq %r15 |
| popq %r14 |
| popq %r13 |
| popq %r12 |
| popq %rbp |
| popq %rbx |
| #undef FSZ |
| ret |
| |
| .section .rodata |
| /* export the ranges so that VGA_(interrupted_syscall) can do the |
| right thing */ |
| |
| .globl VGA_(blksys_setup) |
| .globl VGA_(blksys_restart) |
| .globl VGA_(blksys_complete) |
| .globl VGA_(blksys_committed) |
| .globl VGA_(blksys_finished) |
| VGA_(blksys_setup): .quad 1b |
| VGA_(blksys_restart): .quad 2b |
| VGA_(blksys_complete): .quad 3b |
| VGA_(blksys_committed): .quad 4b |
| VGA_(blksys_finished): .quad 5b |
| .previous |
| |
| /* Let the linker know we don't need an executable stack */ |
| .section .note.GNU-stack,"",@progbits |
| |
| ##--------------------------------------------------------------------## |
| ##--- end ---## |
| ##--------------------------------------------------------------------## |