
/*--------------------------------------------------------------------*/
/*--- Doing syscalls.                                  m_syscall.c ---*/
/*--------------------------------------------------------------------*/

/*
   This file is part of Valgrind, a dynamic binary instrumentation
   framework.

   Copyright (C) 2000-2007 Julian Seward 
      jseward@acm.org

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file COPYING.
*/

#include "pub_core_basics.h"
#include "pub_core_vki.h"
#include "pub_core_vkiscnums.h"
#include "pub_core_syscall.h"

/* ---------------------------------------------------------------------
   Building syscall return values.
   ------------------------------------------------------------------ */

/* Make a SysRes value from an syscall return value.  This is
   Linux-specific.

   From:
   http://sources.redhat.com/cgi-bin/cvsweb.cgi/libc/sysdeps/unix/sysv/
   linux/i386/sysdep.h?
   rev=1.28&content-type=text/x-cvsweb-markup&cvsroot=glibc

   Linux uses a negative return value to indicate syscall errors,
   unlike most Unices, which use the condition codes' carry flag.

   Since version 2.1 the return value of a system call might be
   negative even if the call succeeded.  E.g., the 'lseek' system call
   might return a large offset.  Therefore we must not anymore test
   for < 0, but test for a real error by making sure the value in %eax
   is a real error number.  Linus said he will make sure the no
   syscall returns a value in -1 .. -4095 as a valid result so we can
   safely test with -4095.
*/
SysRes VG_(mk_SysRes_x86_linux) ( UInt val ) {
   SysRes res;
   res.isError = val >= -4095 && val <= -1;
   if (res.isError) {
      res.err = -val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* Similarly .. */
SysRes VG_(mk_SysRes_amd64_linux) ( ULong val ) {
   SysRes res;
   res.isError = val >= -4095 && val <= -1;
   if (res.isError) {
      res.err = -val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* PPC uses the CR7.SO bit to flag an error (CR0 in IBM-speak) */
/* Note this must be in the bottom bit of the second arg */
SysRes VG_(mk_SysRes_ppc32_linux) ( UInt val, UInt cr0so ) {
   SysRes res;
   res.isError = (cr0so & 1) != 0;
   if (res.isError) {
      res.err = val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* As per ppc32 version, cr0.so must be in l.s.b. of 2nd arg */
SysRes VG_(mk_SysRes_ppc64_linux) ( ULong val, ULong cr0so ) {
   SysRes res;
   res.isError = (cr0so & 1) != 0;
   if (res.isError) {
      res.err = val;
      res.res = 0;
   } else {
      res.err = 0;
      res.res = val;
   }
   return res;
}

/* AIX scheme: we have to record both 'res' (r3) and 'err' (r4).  If
   'err' is nonzero then the call has failed, but it could still be
   that AIX userspace will ignore 'err' and instead consult 'res' to
   determine if the call failed.  So we have to record both. */
SysRes VG_(mk_SysRes_ppc32_aix5) ( UInt res, UInt err ) {
   SysRes r;
   r.res     = res;
   r.err     = err;
   r.isError = r.err != 0;
   return r;
}

SysRes VG_(mk_SysRes_ppc64_aix5) ( ULong res, ULong err ) {
   SysRes r;
   r.res     = res;
   r.err     = err;
   r.isError = r.err != 0;
   return r;
}

/* Generic constructors. */
SysRes VG_(mk_SysRes_Error) ( UWord err ) {
   SysRes r;
   r.res     = 0;
   r.err     = err;
   r.isError = True;
   return r;
}

SysRes VG_(mk_SysRes_Success) ( UWord res ) {
   SysRes r;
   r.res     = res;
   r.err     = 0;
   r.isError = False;
   return r;
}


/* ---------------------------------------------------------------------
   A function for doing syscalls.
   ------------------------------------------------------------------ */

#if defined(VGP_x86_linux)
/* Incoming args (syscall number + up to 6 args) come on the stack.
   (ie. the C calling convention).

   The syscall number goes in %eax.  The args are passed to the syscall in
   the regs %ebx, %ecx, %edx, %esi, %edi, %ebp, ie. the kernel's syscall
   calling convention.

   %eax gets the return value.  Not sure which registers the kernel
   clobbers, so we preserve all the callee-save regs (%esi, %edi, %ebx,
   %ebp).
*/
extern UWord do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
"	push	%esi\n"
"	push	%edi\n"
"	push	%ebx\n"
"	push	%ebp\n"
"	movl	16+ 4(%esp),%eax\n"
"	movl	16+ 8(%esp),%ebx\n"
"	movl	16+12(%esp),%ecx\n"
"	movl	16+16(%esp),%edx\n"
"	movl	16+20(%esp),%esi\n"
"	movl	16+24(%esp),%edi\n"
"	movl	16+28(%esp),%ebp\n"
"	int	$0x80\n"
"	popl	%ebp\n"
"	popl	%ebx\n"
"	popl	%edi\n"
"	popl	%esi\n"
"	ret\n"
".previous\n"
);

#elif defined(VGP_amd64_linux)
/* Incoming args (syscall number + up to 6 args) come in %rdi, %rsi,
   %rdx, %rcx, %r8, %r9, and the last one on the stack (ie. the C
   calling convention).

   The syscall number goes in %rax.  The args are passed to the syscall in
   the regs %rdi, %rsi, %rdx, %r10, %r8, %r9 (yes, really %r10, not %rcx),
   ie. the kernel's syscall calling convention.

   %rax gets the return value.  %rcx and %r11 are clobbered by the syscall;
   no matter, they are caller-save (the syscall clobbers no callee-save
   regs, so we don't have to do any register saving/restoring).
*/
extern UWord do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
        /* Convert function calling convention --> syscall calling
           convention */
"	movq	%rdi, %rax\n"
"	movq	%rsi, %rdi\n"
"	movq	%rdx, %rsi\n"
"	movq	%rcx, %rdx\n"
"	movq	%r8,  %r10\n"
"	movq	%r9,  %r8\n"
"	movq    8(%rsp), %r9\n"	 /* last arg from stack */
"	syscall\n"
"	ret\n"
".previous\n"
);

#elif defined(VGP_ppc32_linux)
/* Incoming args (syscall number + up to 6 args) come in %r3:%r9.

   The syscall number goes in %r0.  The args are passed to the syscall in
   the regs %r3:%r8, i.e. the kernel's syscall calling convention.

   The %cr0.so bit flags an error.
   We return the syscall return value in %r3, and the %cr0.so in 
   the lowest bit of %r4.
   We return a ULong, of which %r3 is the high word, and %r4 the low.
   No callee-save regs are clobbered, so no saving/restoring is needed.
*/
extern ULong do_syscall_WRK (
          UWord syscall_no, 
          UWord a1, UWord a2, UWord a3,
          UWord a4, UWord a5, UWord a6
       );
asm(
".text\n"
"do_syscall_WRK:\n"
"        mr      0,3\n"
"        mr      3,4\n"
"        mr      4,5\n"
"        mr      5,6\n"
"        mr      6,7\n"
"        mr      7,8\n"
"        mr      8,9\n"
"        sc\n"                  /* syscall: sets %cr0.so on error         */
"        mfcr    4\n"           /* %cr -> low word of return var          */
"        rlwinm  4,4,4,31,31\n" /* rotate flag bit so to lsb, and mask it */
"        blr\n"                 /* and return                             */
".previous\n"
);

#elif defined(VGP_ppc64_linux)
/* Due to the need to return 65 bits of result, this is completely
   different from the ppc32 case.  The single arg register points to a
   7-word block containing the syscall # and the 6 args.  The syscall
   result proper is put in [0] of the block, and %cr0.so is in the
   bottom but of [1]. */
extern void do_syscall_WRK ( ULong* argblock );
asm(
".align   2\n"
".globl   do_syscall_WRK\n"
".section \".opd\",\"aw\"\n"
".align   3\n"
"do_syscall_WRK:\n"
".quad    .do_syscall_WRK,.TOC.@tocbase,0\n"
".previous\n"
".type    .do_syscall_WRK,@function\n"
".globl   .do_syscall_WRK\n"
".do_syscall_WRK:\n"
"        std  3,-16(1)\n"  /* stash arg */
"        ld   8, 48(3)\n"  /* sc arg 6 */
"        ld   7, 40(3)\n"  /* sc arg 5 */
"        ld   6, 32(3)\n"  /* sc arg 4 */
"        ld   5, 24(3)\n"  /* sc arg 3 */
"        ld   4, 16(3)\n"  /* sc arg 2 */
"        ld   0,  0(3)\n"  /* sc number */
"        ld   3,  8(3)\n"  /* sc arg 1 */
"        sc\n"             /* result in r3 and cr0.so */
"        ld   5,-16(1)\n"  /* reacquire argblock ptr (r5 is caller-save) */
"        std  3,0(5)\n"    /* argblock[0] = r3 */
"        mfcr 3\n"
"        srwi 3,3,28\n"
"        andi. 3,3,1\n"
"        std  3,8(5)\n"    /* argblock[1] = cr0.s0 & 1 */
"        blr\n"
);

#elif defined(VGP_ppc32_aix5)
static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
                             UWord sysno, 
                             UWord a1, UWord a2, UWord a3,
                             UWord a4, UWord a5, UWord a6,
                             UWord a7, UWord a8 )
{
   /* Syscalls on AIX are very similar to function calls:
      - up to 8 args in r3-r10
      - syscall number in r2
      - kernel resumes at 'lr', so must set it appropriately beforehand
      - r3 holds the result and r4 any applicable error code
      See http://www.cs.utexas.edu/users/cart/publications/tr00-04.ps
      and also 'man truss'.
   */
   /* For some reason gcc-3.3.2 doesn't preserve r31 across the asm
      even though we state it to be trashed.  So use r27 instead. */
   UWord args[9];
   args[0] = sysno;
   args[1] = a1; args[2] = a2;
   args[3] = a3; args[4] = a4;
   args[5] = a5; args[6] = a6;
   args[7] = a7; args[8] = a8;

   __asm__ __volatile__(

      // establish base ptr
      "mr   28,%0\n\t"

      // save r2, lr
      "mr   27,2\n\t" // save r2 in r27
      "mflr 30\n\t"   // save lr in r30

      // set syscall number and args
      "lwz   2,  0(28)\n\t"
      "lwz   3,  4(28)\n\t"
      "lwz   4,  8(28)\n\t"
      "lwz   5, 12(28)\n\t"
      "lwz   6, 16(28)\n\t"
      "lwz   7, 20(28)\n\t"
      "lwz   8, 24(28)\n\t"
      "lwz   9, 28(28)\n\t"
      "lwz  10, 32(28)\n\t"

      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
      // wrong address after the sc instruction
      "crorc 6,6,6\n\t"

      // set up LR to point just after the sc insn
      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
      "mflr 29\n\t"
      "addi 29,29,16\n\t"
      "mtlr 29\n\t"

      // do it!
      "sc\n\t"

      // result is now in r3; save it in args[0]
      "stw  3,0(28)\n\t"
      // error code in r4; save it in args[1]
      "stw  4,4(28)\n\t"

      // restore
      "mr   2,27\n\t"
      "mtlr 30\n\t"

      : /*out*/
      : /*in*/  "b" (&args[0])
      : /*trash*/
           /*temps*/    "r31","r30","r29","r28","r27",
           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
                        "xer","ctr","cr0","cr1","cr2","cr3",
                        "cr4","cr5","cr6","cr7"
   );

   *res_r3 = args[0];
   *res_r4 = args[1];
}

#elif defined(VGP_ppc64_aix5)
static void do_syscall_WRK ( UWord* res_r3, UWord* res_r4,
                             UWord sysno, 
                             UWord a1, UWord a2, UWord a3,
                             UWord a4, UWord a5, UWord a6,
                             UWord a7, UWord a8 )
{
   /* Same scheme as ppc32-aix5. */
   UWord args[9];
   args[0] = sysno;
   args[1] = a1; args[2] = a2;
   args[3] = a3; args[4] = a4;
   args[5] = a5; args[6] = a6;
   args[7] = a7; args[8] = a8;

   __asm__ __volatile__(

      // establish base ptr
      "mr   28,%0\n\t"

      // save r2, lr
      "mr   27,2\n\t" // save r2 in r27
      "mflr 30\n\t"   // save lr in r30

      // set syscall number and args
      "ld    2,  0(28)\n\t"
      "ld    3,  8(28)\n\t"
      "ld    4, 16(28)\n\t"
      "ld    5, 24(28)\n\t"
      "ld    6, 32(28)\n\t"
      "ld    7, 40(28)\n\t"
      "ld    8, 48(28)\n\t"
      "ld    9, 56(28)\n\t"
      "ld   10, 64(28)\n\t"

      // set bit 3 of CR1 otherwise AIX 5.1 returns to the
      // wrong address after the sc instruction
      "crorc 6,6,6\n\t"

      // set up LR to point just after the sc insn
      ".long 0x48000005\n\t" // "bl here+4" -- lr := & next insn
      "mflr 29\n\t"
      "addi 29,29,16\n\t"
      "mtlr 29\n\t"

      // do it!
      "sc\n\t"

      // result is now in r3; save it in args[0]
      "std  3,0(28)\n\t"
      // error code in r4; save it in args[1]
      "std  4,8(28)\n\t"

      // restore
      "mr   2,27\n\t"
      "mtlr 30\n\t"

      : /*out*/
      : /*in*/  "b" (&args[0])
      : /*trash*/
           /*temps*/    "r31","r30","r29","r28","r27",
           /*args*/     "r3","r4","r5","r6","r7","r8","r9","r10",
           /*paranoia*/ "memory","cc","r0","r1","r11","r12","r13",
                        "xer","ctr","cr0","cr1","cr2","cr3",
                        "cr4","cr5","cr6","cr7"
   );

   *res_r3 = args[0];
   *res_r4 = args[1];
}

#else
#  error Unknown platform
#endif


SysRes VG_(do_syscall) ( UWord sysno, UWord a1, UWord a2, UWord a3,
                                      UWord a4, UWord a5, UWord a6,
                                      UWord a7, UWord a8 )
{
#if defined(VGP_x86_linux)
  UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  return VG_(mk_SysRes_x86_linux)( val );

#elif defined(VGP_amd64_linux)
  UWord val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  return VG_(mk_SysRes_amd64_linux)( val );

#elif defined(VGP_ppc32_linux)
  ULong ret     = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
  UInt  val     = (UInt)(ret>>32);
  UInt  cr0so   = (UInt)(ret);
  return VG_(mk_SysRes_ppc32_linux)( val, cr0so );

#elif defined(VGP_ppc64_linux)
  ULong argblock[7];
  argblock[0] = sysno;
  argblock[1] = a1;
  argblock[2] = a2;
  argblock[3] = a3;
  argblock[4] = a4;
  argblock[5] = a5;
  argblock[6] = a6;
  do_syscall_WRK( &argblock[0] );
  return VG_(mk_SysRes_ppc64_linux)( argblock[0], argblock[1] );

#elif defined(VGP_ppc32_aix5)
   UWord res;
   UWord err;
   do_syscall_WRK( &res, &err, 
		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
   /* Try to set the error number to zero if the syscall hasn't
      really failed. */
   if (sysno == __NR_AIX5_kread
       || sysno == __NR_AIX5_kwrite) {
      if (res != (UWord)-1L)
         err = 0;
   }
   else if (sysno == __NR_AIX5_sigprocmask
            || sysno == __NR_AIX5__sigpending) {
      if (res == 0)
         err = 0;
   }

   return VG_(mk_SysRes_ppc32_aix5)( res, err );

#elif defined(VGP_ppc64_aix5)
   UWord res;
   UWord err;
   do_syscall_WRK( &res, &err, 
		   sysno, a1, a2, a3, a4, a5, a6, a7, a8);
   /* Try to set the error number to zero if the syscall hasn't
      really failed. */
   if (sysno == __NR_AIX5_kread
       || sysno == __NR_AIX5_kwrite) {
      if (res != (UWord)-1L)
         err = 0;
   }
   else if (sysno == __NR_AIX5_sigprocmask
            || sysno == __NR_AIX5__sigpending) {
      if (res == 0)
         err = 0;
   }

   return VG_(mk_SysRes_ppc64_aix5)( res, err );

#else
#  error Unknown platform
#endif
}

/* ---------------------------------------------------------------------
   Names of errors.
   ------------------------------------------------------------------ */

/* Return a string which gives the name of an error value.  Note,
   unlike the standard C syserror fn, the returned string is not
   malloc-allocated or writable -- treat it as a constant. 
   TODO: implement this properly. */

const HChar* VG_(strerror) ( UWord errnum )
{
   switch (errnum) {
      case VKI_EPERM:       return "Operation not permitted";
      case VKI_ENOENT:      return "No such file or directory";
      case VKI_ESRCH:       return "No such process";
      case VKI_EINTR:       return "Interrupted system call";
      case VKI_EBADF:       return "Bad file number";
      case VKI_EAGAIN:      return "Try again";
      case VKI_ENOMEM:      return "Out of memory";
      case VKI_EACCES:      return "Permission denied";
      case VKI_EFAULT:      return "Bad address";
      case VKI_EEXIST:      return "File exists";
      case VKI_EINVAL:      return "Invalid argument";
      case VKI_EMFILE:      return "Too many open files";
      case VKI_ENOSYS:      return "Function not implemented";
      case VKI_EOVERFLOW:   return "Value too large for defined data type";
      case VKI_ERESTARTSYS: return "ERESTARTSYS";
      default:              return "VG_(strerror): unknown error";
   }
}


/*--------------------------------------------------------------------*/
/*--- end                                                        ---*/
/*--------------------------------------------------------------------*/
