Add a port to IBM z/Architecture (s390x) running Linux -- Valgrind
side components. (Florian Krohm <britzel@acm.org> and Christian
Borntraeger <borntraeger@de.ibm.com>).  Fixes #243404.



git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11604 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index d9d1bca..5b50c4e 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -289,7 +289,8 @@
 	m_dispatch/dispatch-amd64-linux.S \
 	m_dispatch/dispatch-ppc32-linux.S \
 	m_dispatch/dispatch-ppc64-linux.S \
-	m_dispatch/dispatch-arm-linux.S \
+ 	m_dispatch/dispatch-arm-linux.S \
+	m_dispatch/dispatch-s390x-linux.S \
 	m_dispatch/dispatch-ppc32-aix5.S \
 	m_dispatch/dispatch-ppc64-aix5.S \
 	m_dispatch/dispatch-x86-darwin.S \
@@ -309,7 +310,8 @@
 	m_sigframe/sigframe-amd64-linux.c \
 	m_sigframe/sigframe-ppc32-linux.c \
 	m_sigframe/sigframe-ppc64-linux.c \
-	m_sigframe/sigframe-arm-linux.c \
+ 	m_sigframe/sigframe-arm-linux.c \
+	m_sigframe/sigframe-s390x-linux.c \
 	m_sigframe/sigframe-ppc32-aix5.c \
 	m_sigframe/sigframe-ppc64-aix5.c \
 	m_sigframe/sigframe-x86-darwin.c \
@@ -318,7 +320,8 @@
 	m_syswrap/syscall-amd64-linux.S \
 	m_syswrap/syscall-ppc32-linux.S \
 	m_syswrap/syscall-ppc64-linux.S \
-	m_syswrap/syscall-arm-linux.S \
+ 	m_syswrap/syscall-arm-linux.S \
+	m_syswrap/syscall-s390x-linux.S \
 	m_syswrap/syscall-ppc32-aix5.S \
 	m_syswrap/syscall-ppc64-aix5.S \
 	m_syswrap/syscall-x86-darwin.S \
@@ -333,7 +336,8 @@
 	m_syswrap/syswrap-amd64-linux.c \
 	m_syswrap/syswrap-ppc32-linux.c \
 	m_syswrap/syswrap-ppc64-linux.c \
-	m_syswrap/syswrap-arm-linux.c \
+ 	m_syswrap/syswrap-arm-linux.c \
+	m_syswrap/syswrap-s390x-linux.c \
 	m_syswrap/syswrap-ppc32-aix5.c \
 	m_syswrap/syswrap-ppc64-aix5.c \
 	m_syswrap/syswrap-x86-darwin.c \
diff --git a/coregrind/launcher-linux.c b/coregrind/launcher-linux.c
index e7c9c7f..e70152d 100644
--- a/coregrind/launcher-linux.c
+++ b/coregrind/launcher-linux.c
@@ -205,6 +205,10 @@
                 (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
                  ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
                platform = "ppc64-linux";
+            } else if (ehdr->e_machine == EM_S390 &&
+                       (ehdr->e_ident[EI_OSABI] == ELFOSABI_SYSV ||
+                        ehdr->e_ident[EI_OSABI] == ELFOSABI_LINUX)) {
+               platform = "s390x-linux";
             }
          }
       }
@@ -278,7 +282,8 @@
        (0==strcmp(VG_PLATFORM,"amd64-linux")) ||
        (0==strcmp(VG_PLATFORM,"ppc32-linux")) ||
        (0==strcmp(VG_PLATFORM,"ppc64-linux")) ||
-       (0==strcmp(VG_PLATFORM,"arm-linux")))
+       (0==strcmp(VG_PLATFORM,"arm-linux"))   ||
+       (0==strcmp(VG_PLATFORM,"s390x-linux")))
       default_platform = VG_PLATFORM;
    else
       barf("Unknown VG_PLATFORM '%s'", VG_PLATFORM);
diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c b/coregrind/m_aspacemgr/aspacemgr-common.c
index b7a51d6..1d917df 100644
--- a/coregrind/m_aspacemgr/aspacemgr-common.c
+++ b/coregrind/m_aspacemgr/aspacemgr-common.c
@@ -159,7 +159,8 @@
    res = VG_(do_syscall6)(__NR_mmap2, (UWord)start, length,
                           prot, flags, fd, offset / 4096);
 #  elif defined(VGP_amd64_linux) || defined(VGP_ppc64_linux) \
-        || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
+        || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) \
+        || defined(VGP_s390x_linux)
    res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, 
                          prot, flags, fd, offset);
 #  elif defined(VGP_x86_darwin)
diff --git a/coregrind/m_aspacemgr/aspacemgr-linux.c b/coregrind/m_aspacemgr/aspacemgr-linux.c
index 8ed8b84..cc092ce 100644
--- a/coregrind/m_aspacemgr/aspacemgr-linux.c
+++ b/coregrind/m_aspacemgr/aspacemgr-linux.c
@@ -900,10 +900,10 @@
       These kernels report which mappings are really executable in
       the /proc/self/maps output rather than mirroring what was asked
       for when each mapping was created. In order to cope with this we
-      have a sloppyXcheck mode which we enable on x86 - in this mode we
-      allow the kernel to report execute permission when we weren't
+      have a sloppyXcheck mode which we enable on x86 and s390 - in this
+      mode we allow the kernel to report execute permission when we weren't
       expecting it but not vice versa. */
-#  if defined(VGA_x86)
+#  if defined(VGA_x86) || defined (VGA_s390x)
    sloppyXcheck = True;
 #  else
    sloppyXcheck = False;
diff --git a/coregrind/m_coredump/coredump-elf.c b/coregrind/m_coredump/coredump-elf.c
index c47562e..ef27bb5 100644
--- a/coregrind/m_coredump/coredump-elf.c
+++ b/coregrind/m_coredump/coredump-elf.c
@@ -233,9 +233,14 @@
    prs->pr_pgrp = VG_(getpgrp)();
    prs->pr_sid = VG_(getpgrp)();
    
+#ifdef VGP_s390x_linux
+   /* prs->pr_reg has struct type. Need to take address. */
+   regs = (struct vki_user_regs_struct *)&(prs->pr_reg);
+#else
    regs = (struct vki_user_regs_struct *)prs->pr_reg;
 
    vg_assert(sizeof(*regs) == sizeof(prs->pr_reg));
+#endif
 
 #if defined(VGP_x86_linux)
    regs->eflags = LibVEX_GuestX86_get_eflags( &arch->vex );
@@ -343,6 +348,16 @@
    regs->ARM_pc   = arch->vex.guest_R15T;
    regs->ARM_cpsr = LibVEX_GuestARM_get_cpsr( &((ThreadArchState*)arch)->vex );
 
+#elif defined(VGP_s390x_linux)
+#  define DO(n)  regs->gprs[n] = arch->vex.guest_r##n
+   DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
+   DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
+#  undef DO
+#  define DO(n)  regs->acrs[n] = arch->vex.guest_a##n
+   DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
+   DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
+#  undef DO
+   regs->orig_gpr2 = arch->vex.guest_r2;
 #else
 #  error Unknown ELF platform
 #endif
@@ -415,6 +430,11 @@
 #elif defined(VGP_arm_linux)
    // umm ...
 
+#elif defined(VGP_s390x_linux)
+#  define DO(n)  fpu->fprs[n].ui = arch->vex.guest_f##n
+   DO(0);  DO(1);  DO(2);  DO(3);  DO(4);  DO(5);  DO(6);  DO(7);
+   DO(8);  DO(9);  DO(10); DO(11); DO(12); DO(13); DO(14); DO(15);
+# undef DO
 #else
 #  error Unknown ELF platform
 #endif
diff --git a/coregrind/m_debugger.c b/coregrind/m_debugger.c
index eaf80d1..0638755 100644
--- a/coregrind/m_debugger.c
+++ b/coregrind/m_debugger.c
@@ -242,6 +242,76 @@
 #elif defined(VGP_amd64_darwin)
    I_die_here;
 
+#elif defined(VGP_s390x_linux)
+   struct vki_user_regs_struct regs;
+   vki_ptrace_area pa;
+
+   /* We don't set the psw mask and start at offset 8 */
+   pa.vki_len = (unsigned long) &regs.per_info - (unsigned long) &regs.psw.addr;
+   pa.vki_process_addr = (unsigned long) &regs.psw.addr;
+   pa.vki_kernel_addr = 8;
+
+   VG_(memset)(&regs, 0, sizeof(regs));
+   regs.psw.addr = vex->guest_IA;
+
+   /* We don't set the mask */
+   regs.gprs[0] = vex->guest_r0;
+   regs.gprs[1] = vex->guest_r1;
+   regs.gprs[2] = vex->guest_r2;
+   regs.gprs[3] = vex->guest_r3;
+   regs.gprs[4] = vex->guest_r4;
+   regs.gprs[5] = vex->guest_r5;
+   regs.gprs[6] = vex->guest_r6;
+   regs.gprs[7] = vex->guest_r7;
+   regs.gprs[8] = vex->guest_r8;
+   regs.gprs[9] = vex->guest_r9;
+   regs.gprs[10] = vex->guest_r10;
+   regs.gprs[11] = vex->guest_r11;
+   regs.gprs[12] = vex->guest_r12;
+   regs.gprs[13] = vex->guest_r13;
+   regs.gprs[14] = vex->guest_r14;
+   regs.gprs[15] = vex->guest_r15;
+
+   regs.acrs[0] = vex->guest_a0;
+   regs.acrs[1] = vex->guest_a1;
+   regs.acrs[2] = vex->guest_a2;
+   regs.acrs[3] = vex->guest_a3;
+   regs.acrs[4] = vex->guest_a4;
+   regs.acrs[5] = vex->guest_a5;
+   regs.acrs[6] = vex->guest_a6;
+   regs.acrs[7] = vex->guest_a7;
+   regs.acrs[8] = vex->guest_a8;
+   regs.acrs[9] = vex->guest_a9;
+   regs.acrs[10] = vex->guest_a10;
+   regs.acrs[11] = vex->guest_a11;
+   regs.acrs[12] = vex->guest_a12;
+   regs.acrs[13] = vex->guest_a13;
+   regs.acrs[14] = vex->guest_a14;
+   regs.acrs[15] = vex->guest_a15;
+
+   /* only used for system call restart and friends, just use r2 */
+   regs.orig_gpr2 = vex->guest_r2;
+
+   regs.fp_regs.fprs[0].ui = vex->guest_f0;
+   regs.fp_regs.fprs[1].ui = vex->guest_f1;
+   regs.fp_regs.fprs[2].ui = vex->guest_f2;
+   regs.fp_regs.fprs[3].ui = vex->guest_f3;
+   regs.fp_regs.fprs[4].ui = vex->guest_f4;
+   regs.fp_regs.fprs[5].ui = vex->guest_f5;
+   regs.fp_regs.fprs[6].ui = vex->guest_f6;
+   regs.fp_regs.fprs[7].ui = vex->guest_f7;
+   regs.fp_regs.fprs[8].ui = vex->guest_f8;
+   regs.fp_regs.fprs[9].ui = vex->guest_f9;
+   regs.fp_regs.fprs[10].ui = vex->guest_f10;
+   regs.fp_regs.fprs[11].ui = vex->guest_f11;
+   regs.fp_regs.fprs[12].ui = vex->guest_f12;
+   regs.fp_regs.fprs[13].ui = vex->guest_f13;
+   regs.fp_regs.fprs[14].ui = vex->guest_f14;
+   regs.fp_regs.fprs[15].ui = vex->guest_f15;
+   regs.fp_regs.fpc = vex->guest_fpc;
+
+   return VG_(ptrace)(VKI_PTRACE_POKEUSR_AREA, pid,  &pa, NULL);
+
 #else
 #  error Unknown arch
 #endif
diff --git a/coregrind/m_debuginfo/d3basics.c b/coregrind/m_debuginfo/d3basics.c
index e6de809..956fc2f 100644
--- a/coregrind/m_debuginfo/d3basics.c
+++ b/coregrind/m_debuginfo/d3basics.c
@@ -409,6 +409,9 @@
    if (regno == 11) { *a = regs->fp; return True; } 
 #  elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
    vg_assert(0); /* this function should never be called */
+#  elif defined(VGP_s390x_linux)
+   if (regno == 15) { *a = regs->sp; return True; }
+   if (regno == 11) { *a = regs->fp; return True; }
 #  else
 #    error "Unknown platform"
 #  endif
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index 90f1bdf..e59b228 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -703,6 +703,15 @@
 
       2009 Aug 16: apply similar kludge to ppc32-linux.
       See http://bugs.kde.org/show_bug.cgi?id=190820
+
+      There are two modes on s390x: with and without the noexec kernel
+      parameter. Together with some older kernels, this leads to several
+      variants:
+      executable: r and x
+      data:       r and w and x
+      or
+      executable: r and x
+      data:       r and w
    */
    is_rx_map = False;
    is_rw_map = False;
@@ -712,6 +721,9 @@
 #  elif defined(VGA_amd64) || defined(VGA_ppc64) || defined(VGA_arm)
    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
    is_rw_map = seg->hasR && seg->hasW && !seg->hasX;
+#  elif defined(VGP_s390x_linux)
+   is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
+   is_rw_map = seg->hasR && seg->hasW;
 #  else
 #    error "Unknown platform"
 #  endif
@@ -2000,6 +2012,11 @@
             case Creg_ARM_R14: return eec->uregs->r14;
             case Creg_ARM_R13: return eec->uregs->r13;
             case Creg_ARM_R12: return eec->uregs->r12;
+#           elif defined(VGA_s390x)
+            case Creg_IA_IP: return eec->uregs->ia;
+            case Creg_IA_SP: return eec->uregs->sp;
+            case Creg_IA_BP: return eec->uregs->fp;
+            case Creg_S390_R14: return eec->uregs->lr;
 #           elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #           else
 #             error "Unsupported arch"
@@ -2210,6 +2227,24 @@
       case CFIC_ARM_R7REL: 
          cfa = cfsi->cfa_off + uregs->r7;
          break;
+#     elif defined(VGA_s390x)
+      case CFIC_IA_SPREL:
+         cfa = cfsi->cfa_off + uregs->sp;
+         break;
+      case CFIR_MEMCFAREL:
+      {
+         Addr a = uregs->sp + cfsi->cfa_off;
+         if (a < min_accessible || a > max_accessible-sizeof(Addr))
+            break;
+         cfa = *(Addr*)a;
+         break;
+      }
+      case CFIR_SAME:
+         cfa = uregs->fp;
+         break;
+      case CFIC_IA_BPREL:
+         cfa = cfsi->cfa_off + uregs->fp;
+         break;
 #     elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #     else
 #       error "Unsupported arch"
@@ -2262,6 +2297,15 @@
      return compute_cfa(&uregs,
                         min_accessible,  max_accessible, di, cfsi);
    }
+#elif defined(VGA_s390x)
+   { D3UnwindRegs uregs;
+     uregs.ia = ip;
+     uregs.sp = sp;
+     uregs.fp = fp;
+     return compute_cfa(&uregs,
+                        min_accessible,  max_accessible, di, cfsi);
+   }
+
 #  else
    return 0; /* indicates failure */
 #  endif
@@ -2294,6 +2338,8 @@
    ipHere = uregsHere->xip;
 #  elif defined(VGA_arm)
    ipHere = uregsHere->r15;
+#  elif defined(VGA_s390x)
+   ipHere = uregsHere->ia;
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #  else
 #    error "Unknown arch"
@@ -2366,6 +2412,10 @@
    COMPUTE(uregsPrev.r12, uregsHere->r12, cfsi->r12_how, cfsi->r12_off);
    COMPUTE(uregsPrev.r11, uregsHere->r11, cfsi->r11_how, cfsi->r11_off);
    COMPUTE(uregsPrev.r7,  uregsHere->r7,  cfsi->r7_how,  cfsi->r7_off);
+#  elif defined(VGA_s390x)
+   COMPUTE(uregsPrev.ia, uregsHere->ia, cfsi->ra_how, cfsi->ra_off);
+   COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi->sp_how, cfsi->sp_off);
+   COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi->fp_how, cfsi->fp_off);
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #  else
 #    error "Unknown arch"
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index e272529..9abff6d 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -140,6 +140,22 @@
               CFIR_CFAREL    -> cfa + r14/r13/r12/r11/r7/ra_off
               CFIR_MEMCFAREL -> *( cfa + r14/r13/r12/r11/r7/ra_off )
               CFIR_EXPR      -> expr whose index is in r14/r13/r12/r11/r7/ra_off
+
+   On s390x we have a similar logic as x86 or amd64. We need the stack pointer
+   (r15), the frame pointer r11 (like BP) and together with the instruction
+   address in the PSW we can calculate the previous values:
+     cfa = case cfa_how of
+              CFIC_IA_SPREL -> r15 + cfa_off
+              CFIC_IA_BPREL -> r11 + cfa_off
+              CFIR_IA_EXPR  -> expr whose index is in cfa_off
+
+     old_sp/fp/ra
+         = case sp/fp/ra_how of
+              CFIR_UNKNOWN   -> we don't know, sorry
+              CFIR_SAME      -> same as it was before (sp/fp only)
+              CFIR_CFAREL    -> cfa + sp/fp/ra_off
+              CFIR_MEMCFAREL -> *( cfa + sp/fp/ra_off )
+              CFIR_EXPR      -> expr whose index is in sp/fp/ra_off
 */
 
 #define CFIC_IA_SPREL     ((UChar)1)
@@ -208,6 +224,21 @@
       Int   ra_off;
    }
    DiCfSI;
+#elif defined(VGA_s390x)
+typedef
+   struct {
+      Addr  base;
+      UInt  len;
+      UChar cfa_how; /* a CFIC_ value */
+      UChar sp_how;  /* a CFIR_ value */
+      UChar ra_how;  /* a CFIR_ value */
+      UChar fp_how;  /* a CFIR_ value */
+      Int   cfa_off;
+      Int   sp_off;
+      Int   ra_off;
+      Int   fp_off;
+   }
+   DiCfSI;
 #else
 #  error "Unknown arch"
 #endif
@@ -230,7 +261,8 @@
       Creg_ARM_R13,
       Creg_ARM_R12,
       Creg_ARM_R15,
-      Creg_ARM_R14
+      Creg_ARM_R14,
+      Creg_S390_R14
    }
    CfiReg;
 
diff --git a/coregrind/m_debuginfo/readdwarf.c b/coregrind/m_debuginfo/readdwarf.c
index 24829b6..5f1d2ce 100644
--- a/coregrind/m_debuginfo/readdwarf.c
+++ b/coregrind/m_debuginfo/readdwarf.c
@@ -1832,6 +1832,10 @@
 #  define FP_REG         6
 #  define SP_REG         7
 #  define RA_REG_DEFAULT 16
+#elif defined(VGP_s390x_linux)
+#  define FP_REG         11    // sometimes s390 has a frame pointer in r11
+#  define SP_REG         15    // stack is always r15
+#  define RA_REG_DEFAULT 14    // the return address is in r14
 #else
 #  error "Unknown platform"
 #endif
@@ -2139,7 +2143,7 @@
    else
    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == SP_REG) {
       si->cfa_off = ctxs->cfa_off;
-#     if defined(VGA_x86) || defined(VGA_amd64)
+#     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x)
       si->cfa_how = CFIC_IA_SPREL;
 #     elif defined(VGA_arm)
       si->cfa_how = CFIC_ARM_R13REL;
@@ -2150,7 +2154,7 @@
    else
    if (ctxs->cfa_is_regoff && ctxs->cfa_reg == FP_REG) {
       si->cfa_off = ctxs->cfa_off;
-#     if defined(VGA_x86) || defined(VGA_amd64)
+#     if defined(VGA_x86) || defined(VGA_amd64) || defined(VGA_s390x)
       si->cfa_how = CFIC_IA_BPREL;
 #     elif defined(VGA_arm)
       si->cfa_how = CFIC_ARM_R12REL;
@@ -2303,6 +2307,55 @@
    return True;
 
 
+#  elif defined(VGA_s390x)
+
+   SUMMARISE_HOW(si->ra_how, si->ra_off,
+                             ctxs->reg[ctx->ra_reg] );
+   SUMMARISE_HOW(si->fp_how, si->fp_off,
+                             ctxs->reg[FP_REG] );
+   SUMMARISE_HOW(si->sp_how, si->sp_off,
+                             ctxs->reg[SP_REG] );
+
+   /* change some defaults to consumable values */
+   if (si->sp_how == CFIR_UNKNOWN)
+      si->sp_how = CFIR_SAME;
+
+   if (si->fp_how == CFIR_UNKNOWN)
+      si->fp_how = CFIR_SAME;
+
+   if (si->cfa_how == CFIR_UNKNOWN) {
+      si->cfa_how = CFIC_IA_SPREL;
+      si->cfa_off = 160;
+   }
+   if (si->ra_how == CFIR_UNKNOWN) {
+      if (!debuginfo->cfsi_exprs)
+         debuginfo->cfsi_exprs = VG_(newXA)( ML_(dinfo_zalloc),
+                                             "di.ccCt.2a",
+                                             ML_(dinfo_free),
+                                             sizeof(CfiExpr) );
+      si->ra_how = CFIR_EXPR;
+      si->ra_off = ML_(CfiExpr_CfiReg)( debuginfo->cfsi_exprs,
+                                        Creg_S390_R14);
+   }
+
+   /* knock out some obviously stupid cases */
+   if (si->ra_how == CFIR_SAME)
+      { why = 3; goto failed; }
+
+   /* bogus looking range?  Note, we require that the difference is
+      representable in 32 bits. */
+   if (loc_start >= ctx->loc)
+      { why = 4; goto failed; }
+   if (ctx->loc - loc_start > 10000000 /* let's say */)
+      { why = 5; goto failed; }
+
+   si->base = loc_start + ctx->initloc;
+   si->len  = (UInt)(ctx->loc - loc_start);
+
+   return True;
+
+
+
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #  else
 #    error "Unknown arch"
@@ -2376,6 +2429,13 @@
             return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM_R12 );
          if (dwreg == srcuc->ra_reg)
            return ML_(CfiExpr_CfiReg)( dstxa, Creg_ARM_R15 ); /* correct? */
+#        elif defined(VGA_s390x)
+         if (dwreg == SP_REG)
+            return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_SP );
+         if (dwreg == FP_REG)
+            return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_BP );
+         if (dwreg == srcuc->ra_reg)
+            return ML_(CfiExpr_CfiReg)( dstxa, Creg_IA_IP ); /* correct? */
 #        elif defined(VGA_ppc32) || defined(VGA_ppc64)
 #        else
 #           error "Unknown arch"
diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c
index 7cf8ceb..ea42882 100644
--- a/coregrind/m_debuginfo/readelf.c
+++ b/coregrind/m_debuginfo/readelf.c
@@ -1739,7 +1739,7 @@
 
       /* PLT is different on different platforms, it seems. */
 #     if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
-         || defined(VGP_arm_linux)
+         || defined(VGP_arm_linux) || defined (VGP_s390x_linux)
       /* Accept .plt where mapped as rx (code) */
       if (0 == VG_(strcmp)(name, ".plt")) {
          if (inrx && size > 0 && !di->plt_present) {
@@ -2187,8 +2187,9 @@
 
       /* Read the stabs and/or dwarf2 debug information, if any.  It
          appears reading stabs stuff on amd64-linux doesn't work, so
-         we ignore it. */
-#     if !defined(VGP_amd64_linux)
+         we ignore it. On s390x stabs also doesnt work and we always
+         have the dwarf info in the eh_frame. */
+#     if !defined(VGP_amd64_linux)  && !defined(VGP_s390x_linux)
       if (stab_img && stabstr_img) {
          ML_(read_debuginfo_stabs) ( di, stab_img, stab_sz, 
                                          stabstr_img, stabstr_sz );
diff --git a/coregrind/m_debuginfo/storage.c b/coregrind/m_debuginfo/storage.c
index 52aec79..eda421b 100644
--- a/coregrind/m_debuginfo/storage.c
+++ b/coregrind/m_debuginfo/storage.c
@@ -141,6 +141,9 @@
       case CFIC_ARM_R11REL: 
          VG_(printf)("let cfa=oldR11+%d", si->cfa_off); 
          break;
+      case CFIR_SAME:
+         VG_(printf)("let cfa=Same");
+         break;
       case CFIC_ARM_R7REL: 
          VG_(printf)("let cfa=oldR7+%d", si->cfa_off); 
          break;
@@ -172,6 +175,11 @@
    VG_(printf)(" R7=");
    SHOW_HOW(si->r7_how, si->r7_off);
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
+#  elif defined(VGA_s390x)
+   VG_(printf)(" SP=");
+   SHOW_HOW(si->sp_how, si->sp_off);
+   VG_(printf)(" FP=");
+   SHOW_HOW(si->fp_how, si->fp_off);
 #  else
 #    error "Unknown arch"
 #  endif
diff --git a/coregrind/m_debuglog.c b/coregrind/m_debuglog.c
index 473771b..5ec7a90 100644
--- a/coregrind/m_debuglog.c
+++ b/coregrind/m_debuglog.c
@@ -516,6 +516,48 @@
    return __res;
 }
 
+#elif defined(VGP_s390x_linux)
+static UInt local_sys_write_stderr ( HChar* buf, Int n )
+{
+   register Int    r2     asm("2") = 2;      /* file descriptor STDERR */
+   register HChar* r3     asm("3") = buf;
+   register ULong  r4     asm("4") = n;
+   register ULong  r2_res asm("2");
+   ULong __res;
+
+   __asm__ __volatile__ (
+      "svc %b1\n"
+      : "=d" (r2_res)
+      : "i" (__NR_write),
+        "0" (r2),
+        "d" (r3),
+        "d" (r4)
+      : "cc", "memory");
+   __res = r2_res;
+
+   if (__res >= (ULong)(-125))
+      __res = -1;
+   return (UInt)(__res);
+}
+
+static UInt local_sys_getpid ( void )
+{
+   register ULong r2 asm("2");
+   ULong __res;
+
+   __asm__ __volatile__ (
+      "svc %b1\n"
+      : "=d" (r2)
+      : "i" (__NR_getpid)
+      : "cc", "memory");
+   __res = r2;
+
+   if (__res >= (ULong)(-125))
+      __res = -1;
+   return (UInt)(__res);
+}
+
+
 #else
 # error Unknown platform
 #endif
diff --git a/coregrind/m_dispatch/dispatch-s390x-linux.S b/coregrind/m_dispatch/dispatch-s390x-linux.S
new file mode 100644
index 0000000..4b9a800
--- /dev/null
+++ b/coregrind/m_dispatch/dispatch-s390x-linux.S
@@ -0,0 +1,397 @@
+
+/*--------------------------------------------------------------------*/
+/*--- The core dispatch loop, for jumping to a code address.       ---*/
+/*---                                       dispatch-s390x-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+  This file is part of Valgrind, a dynamic binary instrumentation
+  framework.
+
+  Copyright IBM Corp. 2010-2011
+
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+  02111-1307, USA.
+
+  The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm and Christian Borntraeger */
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_dispatch_asm.h"
+#include "pub_core_transtab_asm.h"
+#include "libvex_guest_offsets.h"
+#include "libvex_s390x_common.h"
+
+#if defined(VGA_s390x)
+
+/*------------------------------------------------------------*/
+/*---                                                      ---*/
+/*--- The dispatch loop.  VG_(run_innerloop) is used to    ---*/
+/*--- run all translations except no-redir ones.           ---*/
+/*---                                                      ---*/
+/*------------------------------------------------------------*/
+
+/* Convenience definitions for readability */
+#undef  SP
+#define SP S390_REGNO_STACK_POINTER
+
+#undef  LR
+#define LR S390_REGNO_LINK_REGISTER
+
+/* Location of valgrind's saved FPC register */
+#define S390_LOC_SAVED_FPC_V S390_OFFSET_SAVED_FPC_V(SP)
+
+/* Location of saved guest state pointer */
+#define S390_LOC_SAVED_GSP S390_OFFSET_SAVED_GSP(SP)
+
+/*----------------------------------------------------*/
+/*--- Preamble (set everything up)                 ---*/
+/*----------------------------------------------------*/
+
+/* signature:
+UWord VG_(run_innerloop) ( void* guest_state, UWord do_profiling );
+*/
+
+.text
+.align   4
+.globl VG_(run_innerloop)
+VG_(run_innerloop):
+        /* r2 holds address of guest_state */
+        /* r3 holds do_profiling (a flag) */
+
+        /* Save gprs   ABI: r6...r13 and r15 */
+        stmg %r6,%r15,48(SP)
+
+        /* New stack frame */
+        aghi SP,-S390_INNERLOOP_FRAME_SIZE
+
+        /* Save fprs:   ABI: f8...f15 */
+        std  %f8,160+0(SP)
+        std  %f9,160+8(SP)
+        std  %f10,160+16(SP)
+        std  %f11,160+24(SP)
+        std  %f12,160+32(SP)
+        std  %f13,160+40(SP)
+        std  %f14,160+48(SP)
+        std  %f15,160+56(SP)
+
+        /* Load address of guest state into guest state register (r13) */
+        lgr  %r13,%r2
+
+        /* Store address of guest state pointer on stack.
+           It will be needed later because upon return from a VEX translation
+           r13 may contain a special value. So the old value will be used to
+           determine whether r13 contains a special value. */
+        stg  %r13,S390_LOC_SAVED_GSP
+
+        /* Save valgrind's FPC on stack so run_innerloop_exit can restore
+           it later . */
+        stfpc S390_LOC_SAVED_FPC_V
+
+        /* Load the FPC the way the client code wants it. I.e. pull the
+           value from the guest state.
+        lfpc OFFSET_s390x_fpc(%r13)
+
+        /* Get the IA from the guest state */
+        lg   %r2,OFFSET_s390x_IA(%r13)
+
+        /* Get VG_(dispatch_ctr) -- a 32-bit value -- and store it in a reg */
+        larl %r6,VG_(dispatch_ctr)
+        l    S390_REGNO_DISPATCH_CTR,0(%r6)
+
+        /* Fall into main loop (the right one) */
+
+        /* r3 = 1 --> do_profiling. We may trash r3 later on. That's OK,
+           because it's a volatile register (does not need to be preserved). */
+        ltgr %r3,%r3
+        je   run_innerloop__dispatch_unprofiled
+        j    run_innerloop__dispatch_profiled
+
+/*----------------------------------------------------*/
+/*--- NO-PROFILING (standard) dispatcher           ---*/
+/*----------------------------------------------------*/
+
+run_innerloop__dispatch_unprofiled:
+        /* This is the story:
+
+           r2  = IA = next guest address
+           r12 = VG_(dispatch_ctr)
+           r13 = guest state pointer or (upon return from guest code) some
+                 special value
+           r15 = stack pointer (as usual)
+        */
+
+	/* Has the guest state pointer been messed with?  If yes, exit. */
+        cg   %r13,S390_LOC_SAVED_GSP    /* r13 = actual guest state pointer */
+        larl %r8, VG_(tt_fast)
+        jne  gsp_changed
+
+        /* Save the jump address in the guest state */
+        stg  %r2,OFFSET_s390x_IA(%r13)
+
+
+	/* Try a fast lookup in the translation cache:
+           Compute offset (not index) into VT_(tt_fast):
+
+           offset = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+
+           with VG_TT_FAST_HASH(addr) == (addr >> 1) & VG_TT_FAST_MASK
+           and  sizeof(FastCacheEntry) == 16
+
+           offset = ((addr >> 1) & VG_TT_FAST_MASK) << 4
+        */
+        lghi %r5,VG_TT_FAST_MASK
+        srlg %r7, %r2,1              /* next guest addr >> 1*/
+        ngr  %r7,%r5
+        sllg %r7,%r7,4
+
+        /* Set the return address to the beginning of the loop here to
+           have some instruction between setting r7 and using it as an
+           address */
+	larl LR,run_innerloop__dispatch_unprofiled
+
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+        ahi  S390_REGNO_DISPATCH_CTR,-1
+        jz   counter_is_zero
+
+
+        lg   %r10, 0(%r8,%r7)      /* .guest */
+        lg   %r11, 8(%r8,%r7)      /* .host */
+        cgr  %r2, %r10
+        jne  fast_lookup_failed
+
+        /* Found a match.  Call .host.
+           r11 is an address. There we will find the instrumented client code.
+           That code may modify the guest state register r13. The client code
+           will return to the beginning of this loop start by issuing br LR.
+           We can simply branch to the host code */
+        br %r11
+
+
+/*----------------------------------------------------*/
+/*--- PROFILING dispatcher (can be much slower)    ---*/
+/*----------------------------------------------------*/
+
+run_innerloop__dispatch_profiled:
+
+	/* Has the guest state pointer been messed with?  If yes, exit. */
+        cg   %r13,S390_LOC_SAVED_GSP    /* r13 = actual guest state pointer */
+        larl %r8, VG_(tt_fast)
+        jne  gsp_changed
+
+        /* Save the jump address in the guest state */
+        stg  %r2,OFFSET_s390x_IA(%r13)
+
+	/* Try a fast lookup in the translation cache:
+           Compute offset (not index) into VT_(tt_fast):
+
+           offset = VG_TT_FAST_HASH(addr) * sizeof(FastCacheEntry)
+
+           with VG_TT_FAST_HASH(addr) == (addr >> 1) & VG_TT_FAST_MASK
+           and  sizeof(FastCacheEntry) == 16
+
+           offset = ((addr >> 1) & VG_TT_FAST_MASK) << 4
+        */
+        lghi %r5,VG_TT_FAST_MASK
+        srlg %r7,%r2,1              /* next guest addr >> 1*/
+        ngr  %r7,%r5
+        sllg %r7,%r7,4
+
+        /* Set the return address to the beginning of the loop here to
+           have some instruction between setting r7 and using it as an
+           address */
+	larl LR,run_innerloop__dispatch_profiled
+
+	/* Are we out of timeslice?  If yes, defer to scheduler. */
+        ahi  S390_REGNO_DISPATCH_CTR,-1
+        jz   counter_is_zero
+
+        lg   %r10, 0(%r8,%r7)      /* .guest */
+        lg   %r11, 8(%r8,%r7)      /* .host */
+        cgr  %r2, %r10
+        jne  fast_lookup_failed
+
+        /* sizeof(FastCacheEntry) == 16, sizeof(*UInt)==8 */
+        srlg %r7,%r7,1
+
+        /* we got a hit: VG_(tt_fastN) is guaranteed to point to count */
+        larl %r8, VG_(tt_fastN)
+
+        /* increment bb profile counter */
+        lg   %r9,0(%r8,%r7)
+        l    %r10,0(%r9)
+        ahi  %r10,1
+        st   %r10,0(%r9)
+
+        /* Found a match.  Call .host.
+           r11 is an address. There we will find the instrumented client code.
+           That code may modify the guest state register r13. The client code
+           will return to the beginning of this loop start by issuing br LR.
+           We can simply branch to the host code */
+        br %r11
+
+/*----------------------------------------------------*/
+/*--- exit points                                  ---*/
+/*----------------------------------------------------*/
+
+gsp_changed:
+	/* Someone messed with the gsp (in r13).  Have to
+           defer to scheduler to resolve this.  The register
+           holding VG_(dispatch_ctr) is not yet decremented,
+           so no need to increment. */
+
+        /* Update the IA in the guest state */
+        lg  %r6,S390_LOC_SAVED_GSP       /* r6 = original guest state pointer */
+        stg %r2,OFFSET_s390x_IA(%r6)
+
+        /* Return the special guest state pointer value */
+        lgr %r2, %r13
+	j   run_innerloop_exit
+
+
+counter_is_zero:
+	/* IA is up to date */
+
+	/* Back out decrement of the dispatch counter */
+        ahi S390_REGNO_DISPATCH_CTR,1
+
+        /* Set return value for the scheduler */
+        lghi %r2,VG_TRC_INNER_COUNTERZERO
+        j    run_innerloop_exit
+
+
+fast_lookup_failed:
+	/* IA is up to date */
+
+	/* Back out decrement of the dispatch counter */
+        ahi S390_REGNO_DISPATCH_CTR,1
+
+        /* Set return value for the scheduler */
+        lghi %r2,VG_TRC_INNER_FASTMISS
+        j    run_innerloop_exit
+
+
+        /* All exits from the dispatcher go through here.
+           When we come here r2 holds the return value. */
+run_innerloop_exit:
+
+	/* Restore valgrind's FPC, as client code may have changed it. */
+        lfpc S390_LOC_SAVED_FPC_V
+
+        /* Write ctr to VG_(dispatch_ctr) (=32bit value) */
+        larl %r6,VG_(dispatch_ctr)
+        st   S390_REGNO_DISPATCH_CTR,0(%r6)
+
+        /* Restore callee-saved registers... */
+
+        /* Floating-point regs */
+        ld  %f8,160+0(SP)
+        ld  %f9,160+8(SP)
+        ld  %f10,160+16(SP)
+        ld  %f11,160+24(SP)
+        ld  %f12,160+32(SP)
+        ld  %f13,160+40(SP)
+        ld  %f14,160+48(SP)
+        ld  %f15,160+56(SP)
+
+        /* Remove atack frame */
+        aghi SP,S390_INNERLOOP_FRAME_SIZE
+
+        /* General-purpose regs. This also restores the original link
+           register (r14) and stack pointer (r15). */
+        lmg %r6,%r15,48(SP)
+
+        /* Return */
+        br  LR
+
+/*------------------------------------------------------------*/
+/*---                                                      ---*/
+/*--- A special dispatcher, for running no-redir           ---*/
+/*--- translations.  Just runs the given translation once. ---*/
+/*---                                                      ---*/
+/*------------------------------------------------------------*/
+
+/* signature:
+void VG_(run_a_noredir_translation) ( UWord* argblock );
+*/
+
+/* Run a no-redir translation.  argblock points to 4 UWords, 2 to carry args
+   and 2 to carry results:
+      0: input:  ptr to translation
+      1: input:  ptr to guest state
+      2: output: next guest PC
+      3: output: guest state pointer afterwards (== thread return code)
+*/
+.text
+.align   4
+.globl VG_(run_a_noredir_translation)
+VG_(run_a_noredir_translation):
+        stmg %r6,%r15,48(SP)
+        aghi SP,-S390_INNERLOOP_FRAME_SIZE
+        std  %f8,160+0(SP)
+        std  %f9,160+8(SP)
+        std  %f10,160+16(SP)
+        std  %f11,160+24(SP)
+        std  %f12,160+32(SP)
+        std  %f13,160+40(SP)
+        std  %f14,160+48(SP)
+        std  %f15,160+56(SP)
+
+        /* Load address of guest state into guest state register (r13) */
+        lg   %r13,8(%r2)
+
+        /* Get the IA */
+        lg   %r11,0(%r2)
+
+        /* save r2 (argblock) as it is clobbered */
+	stg  %r2,160+64(SP)
+
+        /* the call itself */
+        basr LR,%r11
+
+        /* restore argblock */
+	lg   %r1,160+64(SP)
+	/* save the next guest PC */
+	stg  %r2,16(%r1)
+
+	/* save the guest state */
+	stg  %r13,24(%r1)
+
+        /* Restore Floating-point regs */
+        ld  %f8,160+0(SP)
+        ld  %f9,160+8(SP)
+        ld  %f10,160+16(SP)
+        ld  %f11,160+24(SP)
+        ld  %f12,160+32(SP)
+        ld  %f13,160+40(SP)
+        ld  %f14,160+48(SP)
+        ld  %f15,160+56(SP)
+
+        aghi SP,S390_INNERLOOP_FRAME_SIZE
+
+        lmg %r6,%r15,48(SP)
+	br  %r14
+
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif /* VGA_s390x */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   dispatch-s390x-linux.S ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index ad7dedd..14532fb 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -1040,6 +1040,21 @@
    // FIXME jrs: what's this for?
    arch->vex.guest_R1 =  iifii.initial_client_SP;
 
+#  elif defined(VGP_s390x_linux)
+   vg_assert(0 == sizeof(VexGuestS390XState) % 16);
+
+   /* Zero out the initial state. This also sets the guest_fpc to 0, which
+      is also done by the kernel for the fpc during execve. */
+   LibVEX_GuestS390X_initialise(&arch->vex);
+
+   /* Zero out the shadow area. */
+   VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestS390XState));
+   VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestS390XState));
+
+   /* Put essential stuff into the new state. */
+   arch->vex.guest_SP = iifii.initial_client_SP;
+   arch->vex.guest_IA = iifii.initial_client_IP;
+
 #  else
 #    error Unknown platform
 #  endif
diff --git a/coregrind/m_libcassert.c b/coregrind/m_libcassert.c
index 731864f..63e3f1c 100644
--- a/coregrind/m_libcassert.c
+++ b/coregrind/m_libcassert.c
@@ -135,6 +135,22 @@
         (srP)->misc.ARM.r11 = block[4];                   \
         (srP)->misc.ARM.r7  = block[5];                   \
       }
+#elif defined(VGP_s390x_linux)
+#  define GET_STARTREGS(srP)                              \
+      { ULong ia, sp, fp, lr;                             \
+        __asm__ __volatile__(                             \
+           "bras %0,0f;"                                  \
+           "0: lgr %1,15;"                                \
+           "lgr %2,11;"                                   \
+           "lgr %3,14;"                                   \
+           : "=r" (ia), "=r" (sp),"=r" (fp),"=r" (lr)     \
+           /* no read & clobber */                        \
+        );                                                \
+        (srP)->r_pc = ia;                                 \
+        (srP)->r_sp = sp;                                 \
+        (srP)->misc.S390X.r_fp = fp;                      \
+        (srP)->misc.S390X.r_lr = lr;                      \
+      }
 #else
 #  error Unknown platform
 #endif
diff --git a/coregrind/m_libcfile.c b/coregrind/m_libcfile.c
index 9c1847b..8c1dbc9 100644
--- a/coregrind/m_libcfile.c
+++ b/coregrind/m_libcfile.c
@@ -795,7 +795,7 @@
 Int VG_(socket) ( Int domain, Int type, Int protocol )
 {
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[3];
    args[0] = domain;
@@ -836,7 +836,7 @@
 Int my_connect ( Int sockfd, struct vki_sockaddr_in* serv_addr, Int addrlen )
 {
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[3];
    args[0] = sockfd;
@@ -876,7 +876,7 @@
       SIGPIPE */
 
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[4];
    args[0] = sd;
@@ -908,7 +908,7 @@
 Int VG_(getsockname) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 {
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[3];
    args[0] = sd;
@@ -940,7 +940,7 @@
 Int VG_(getpeername) ( Int sd, struct vki_sockaddr *name, Int *namelen)
 {
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[3];
    args[0] = sd;
@@ -973,7 +973,7 @@
                       Int *optlen)
 {
 #  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
-      || defined(VGP_ppc64_linux)
+      || defined(VGP_ppc64_linux) || defined(VGP_s390x_linux)
    SysRes res;
    UWord  args[5];
    args[0] = sd;
diff --git a/coregrind/m_libcproc.c b/coregrind/m_libcproc.c
index 61441d7..e01f612 100644
--- a/coregrind/m_libcproc.c
+++ b/coregrind/m_libcproc.c
@@ -545,7 +545,7 @@
 #  elif defined(VGP_amd64_linux) || defined(VGP_ppc64_linux)  \
         || defined(VGP_arm_linux)                             \
         || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) \
-        || defined(VGO_darwin)
+        || defined(VGO_darwin) || defined(VGP_s390x_linux)
    SysRes sres;
    sres = VG_(do_syscall2)(__NR_getgroups, size, (Addr)list);
    if (sr_isError(sres))
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index 8dc3e17..d832d30 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -94,6 +94,13 @@
       = VG_(threads)[tid].arch.vex.guest_R11;
    regs->misc.ARM.r7
       = VG_(threads)[tid].arch.vex.guest_R7;
+#  elif defined(VGA_s390x)
+   regs->r_pc = (ULong)VG_(threads)[tid].arch.vex.guest_IA;
+   regs->r_sp = (ULong)VG_(threads)[tid].arch.vex.guest_SP;
+   regs->misc.S390X.r_fp
+      = VG_(threads)[tid].arch.vex.guest_r11;
+   regs->misc.S390X.r_lr
+      = VG_(threads)[tid].arch.vex.guest_r14;
 #  else
 #    error "Unknown arch"
 #  endif
@@ -125,6 +132,9 @@
    VG_(threads)[tid].arch.vex_shadow2.guest_GPR4 = s2err;
 #  elif defined(VGO_darwin)
    // GrP fixme darwin syscalls may return more values (2 registers plus error)
+#  elif defined(VGP_s390x_linux)
+   VG_(threads)[tid].arch.vex_shadow1.guest_r2 = s1res;
+   VG_(threads)[tid].arch.vex_shadow2.guest_r2 = s2res;
 #  else
 #    error "Unknown plat"
 #  endif
@@ -257,6 +267,23 @@
    (*f)(vex->guest_R12);
    (*f)(vex->guest_R13);
    (*f)(vex->guest_R14);
+#elif defined(VGA_s390x)
+   (*f)(vex->guest_r0);
+   (*f)(vex->guest_r1);
+   (*f)(vex->guest_r2);
+   (*f)(vex->guest_r3);
+   (*f)(vex->guest_r4);
+   (*f)(vex->guest_r5);
+   (*f)(vex->guest_r6);
+   (*f)(vex->guest_r7);
+   (*f)(vex->guest_r8);
+   (*f)(vex->guest_r9);
+   (*f)(vex->guest_r10);
+   (*f)(vex->guest_r11);
+   (*f)(vex->guest_r12);
+   (*f)(vex->guest_r13);
+   (*f)(vex->guest_r14);
+   (*f)(vex->guest_r15);
 #else
 #  error Unknown arch
 #endif
@@ -357,6 +384,11 @@
           then safe to use VG_(machine_get_VexArchInfo) 
                        and VG_(machine_ppc64_has_VMX)
 
+   -------------
+   s390x: initially:  call VG_(machine_get_hwcaps)
+
+          then safe to use VG_(machine_get_VexArchInfo)
+
    VG_(machine_get_hwcaps) may use signals (although it attempts to
    leave signal state unchanged) and therefore should only be
    called before m_main sets up the client's signal state.
@@ -383,10 +415,11 @@
 Int VG_(machine_arm_archlevel) = 4;
 #endif
 
+/* fixs390: anything for s390x here ? */
 
 /* For hwcaps detection on ppc32/64 and arm we'll need to do SIGILL
    testing, so we need a jmp_buf. */
-#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
+#if defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm) || defined(VGA_s390x)
 #include <setjmp.h> // For jmp_buf
 static jmp_buf env_unsup_insn;
 static void handler_unsup_insn ( Int x ) { __builtin_longjmp(env_unsup_insn,1); }
@@ -835,6 +868,96 @@
      return True;
    }
 
+#elif defined(VGA_s390x)
+   {
+     /* Instruction set detection code borrowed from ppc above. */
+     vki_sigset_t          saved_set, tmp_set;
+     vki_sigaction_fromK_t saved_sigill_act;
+     vki_sigaction_toK_t     tmp_sigill_act;
+
+     volatile Bool have_LDISP, have_EIMM, have_GIE, have_DFP;
+     Int r;
+
+     /* Unblock SIGILL and stash away the old action for that signal */
+     VG_(sigemptyset)(&tmp_set);
+     VG_(sigaddset)(&tmp_set, VKI_SIGILL);
+
+     r = VG_(sigprocmask)(VKI_SIG_UNBLOCK, &tmp_set, &saved_set);
+     vg_assert(r == 0);
+
+     r = VG_(sigaction)(VKI_SIGILL, NULL, &saved_sigill_act);
+     vg_assert(r == 0);
+     tmp_sigill_act = saved_sigill_act;
+
+     /* NODEFER: signal handler does not return (from the kernel's point of
+        view), hence if it is to successfully catch a signal more than once,
+        we need the NODEFER flag. */
+     tmp_sigill_act.sa_flags &= ~VKI_SA_RESETHAND;
+     tmp_sigill_act.sa_flags &= ~VKI_SA_SIGINFO;
+     tmp_sigill_act.sa_flags |=  VKI_SA_NODEFER;
+     tmp_sigill_act.ksa_handler = handler_unsup_insn;
+     VG_(sigaction)(VKI_SIGILL, &tmp_sigill_act, NULL);
+
+     /* Determine hwcaps. Note, we cannot use the stfle insn because it
+        is not supported on z900. */
+
+     have_LDISP = True;
+     if (__builtin_setjmp(env_unsup_insn)) {
+        have_LDISP = False;
+     } else {
+       /* BASR loads the address of the next insn into r1. Needed to avoid
+          a segfault in XY. */
+        __asm__ __volatile__("basr %%r1,%%r0\n\t"
+                             ".long  0xe3001000\n\t"  /* XY  0,0(%r1) */
+                             ".short 0x0057" : : : "r0", "r1", "cc", "memory");
+     }
+
+     have_EIMM = True;
+     if (__builtin_setjmp(env_unsup_insn)) {
+        have_EIMM = False;
+     } else {
+        __asm__ __volatile__(".long  0xc0090000\n\t"  /* iilf r0,0 */
+                             ".short 0x0000" : : : "r0", "memory");
+     }
+
+     have_GIE = True;
+     if (__builtin_setjmp(env_unsup_insn)) {
+        have_GIE = False;
+     } else {
+        __asm__ __volatile__(".long  0xc2010000\n\t"  /* msfi r0,0 */
+                             ".short 0x0000" : : : "r0", "memory");
+     }
+
+     have_DFP = True;
+     if (__builtin_setjmp(env_unsup_insn)) {
+        have_DFP = False;
+     } else {
+        __asm__ __volatile__(".long 0xb3d20000"
+                               : : : "r0", "cc", "memory");  /* adtr r0,r0,r0 */
+     }
+
+     /* Restore signals */
+     r = VG_(sigaction)(VKI_SIGILL, &saved_sigill_act, NULL);
+     vg_assert(r == 0);
+     r = VG_(sigprocmask)(VKI_SIG_SETMASK, &saved_set, NULL);
+     vg_assert(r == 0);
+     VG_(debugLog)(1, "machine", "LDISP %d EIMM %d GIE %d DFP %d\n",
+                   have_LDISP, have_EIMM, have_GIE, have_DFP);
+
+     /* Check for long displacement facility which is required */
+     if (! have_LDISP) return False;
+
+     va = VexArchS390X;
+
+     vai.hwcaps = 0;
+     if (have_LDISP) vai.hwcaps |= VEX_HWCAPS_S390X_LDISP;
+     if (have_EIMM)  vai.hwcaps |= VEX_HWCAPS_S390X_EIMM;
+     if (have_GIE)   vai.hwcaps |= VEX_HWCAPS_S390X_GIE;
+     if (have_DFP)   vai.hwcaps |= VEX_HWCAPS_S390X_DFP;
+
+     return True;
+   }
+
 #elif defined(VGA_arm)
    {
      /* Same instruction set detection algorithm as for ppc32. */
@@ -1017,7 +1140,8 @@
 {
 #if defined(VGP_x86_linux) || defined(VGP_amd64_linux)  \
     || defined(VGP_arm_linux)                           \
-    || defined(VGP_ppc32_linux) || defined(VGO_darwin)
+    || defined(VGP_ppc32_linux) || defined(VGO_darwin)  \
+    || defined(VGP_s390x_linux)
    return f;
 #elif defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \
                                || defined(VGP_ppc64_aix5)
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index d5e762d..acfcdda 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -1626,6 +1626,7 @@
                     "AMD Athlon or above)\n");
         VG_(printf)("   * AMD Athlon64/Opteron\n");
         VG_(printf)("   * PowerPC (most; ppc405 and above)\n");
+        VG_(printf)("   * System z (64bit only - s390x; z900 and above)\n");
         VG_(printf)("\n");
         VG_(exit)(1);
      }
@@ -1937,6 +1938,8 @@
       iters = 5;
 #     elif defined(VGP_arm_linux)
       iters = 1;
+#     elif defined(VGP_s390x_linux)
+      iters = 10;
 #     elif defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
       iters = 4;
 #     elif defined(VGO_darwin)
@@ -2777,6 +2780,47 @@
     "\tnop\n"
     "\ttrap\n"
 );
+#elif defined(VGP_s390x_linux)
+/*
+    This is the canonical entry point, usually the first thing in the text
+    segment. Most registers' values are unspecified, except for:
+
+    %r14         Contains a function pointer to be registered with `atexit'.
+                 This is how the dynamic linker arranges to have DT_FINI
+                 functions called for shared libraries that have been loaded
+                 before this code runs.
+
+    %r15         The stack contains the arguments and environment:
+                 0(%r15)              argc
+                 8(%r15)              argv[0]
+                 ...
+                 (8*argc)(%r15)       NULL
+                 (8*(argc+1))(%r15)   envp[0]
+                 ...
+                                      NULL
+*/
+asm("\n\t"
+    ".text\n\t"
+    ".globl _start\n\t"
+    ".type  _start,@function\n\t"
+    "_start:\n\t"
+    /* set up the new stack in %r1 */
+    "larl   %r1,  vgPlain_interim_stack\n\t"
+    "larl   %r5,  1f\n\t"
+    "ag     %r1,  0(%r5)\n\t"
+    "ag     %r1,  2f-1f(%r5)\n\t"
+    "nill   %r1,  0xFFF0\n\t"
+    /* install it, and collect the original one */
+    "lgr    %r2,  %r15\n\t"
+    "lgr    %r15, %r1\n\t"
+    /* call _start_in_C_linux, passing it the startup %r15 */
+    "brasl  %r14, _start_in_C_linux\n\t"
+    /* trigger execution of an invalid opcode -> halt machine */
+    "j      .+2\n\t"
+    "1:   .quad "VG_STRINGIFY(VG_STACK_GUARD_SZB)"\n\t"
+    "2:   .quad "VG_STRINGIFY(VG_STACK_ACTIVE_SZB)"\n\t"
+    ".previous\n"
+);
 #elif defined(VGP_arm_linux)
 asm("\n"
     "\t.align 2\n"
diff --git a/coregrind/m_redir.c b/coregrind/m_redir.c
index db7131f..065b61a 100644
--- a/coregrind/m_redir.c
+++ b/coregrind/m_redir.c
@@ -1078,6 +1078,9 @@
                          (Addr)&VG_(amd64_darwin_REDIR_FOR_arc4random), NULL);
    }
 
+#  elif defined(VGP_s390x_linux)
+   /* nothing so far */
+
 #  else
 #    error Unknown platform
 #  endif
diff --git a/coregrind/m_scheduler/scheduler.c b/coregrind/m_scheduler/scheduler.c
index c95c6a3..fb25c9b 100644
--- a/coregrind/m_scheduler/scheduler.c
+++ b/coregrind/m_scheduler/scheduler.c
@@ -677,6 +677,10 @@
    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
 #  endif
+
+#  if defined(VGA_s390x)
+   /* no special requirements */
+#  endif
 }
 
 
@@ -1311,6 +1315,9 @@
 #elif defined(VGA_arm)
 #  define VG_CLREQ_ARGS       guest_R4
 #  define VG_CLREQ_RET        guest_R3
+#elif defined (VGA_s390x)
+#  define VG_CLREQ_ARGS       guest_r2
+#  define VG_CLREQ_RET        guest_r3
 #else
 #  error Unknown arch
 #endif
diff --git a/coregrind/m_sigframe/sigframe-s390x-linux.c b/coregrind/m_sigframe/sigframe-s390x-linux.c
new file mode 100644
index 0000000..e5e3801
--- /dev/null
+++ b/coregrind/m_sigframe/sigframe-s390x-linux.c
@@ -0,0 +1,570 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Create/destroy signal delivery frames.                       ---*/
+/*---                                       sigframe-s390x-linux.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Christian Borntraeger */
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_machine.h"
+#include "pub_core_options.h"
+#include "pub_core_sigframe.h"
+#include "pub_core_signals.h"
+#include "pub_core_tooliface.h"
+#include "pub_core_trampoline.h"
+
+#if defined(VGA_s390x)
+
+/* This module creates and removes signal frames for signal deliveries
+   on s390x-linux.
+
+   Note, this file contains kernel-specific knowledge in the form of
+   'struct sigframe' and 'struct rt_sigframe'.
+
+   Either a 'struct sigframe' or a 'struct rtsigframe' is pushed
+   onto the client's stack.  This contains a subsidiary
+   vki_ucontext.  That holds the vcpu's state across the signal,
+   so that the sighandler can mess with the vcpu state if it
+   really wants.
+*/
+
+#define SET_SIGNAL_GPR(zztst, zzn, zzval)                    \
+   do { zztst->arch.vex.guest_r##zzn = (unsigned long)(zzval);              \
+      VG_TRACK( post_reg_write, Vg_CoreSignal, zztst->tid,     \
+                offsetof(VexGuestS390XState,guest_r##zzn), \
+                sizeof(UWord) );                             \
+   } while (0)
+
+/*------------------------------------------------------------*/
+/*--- Signal frame layouts                                 ---*/
+/*------------------------------------------------------------*/
+
+// A structure in which to save the application's registers
+// during the execution of signal handlers.
+
+// Linux has 2 signal frame structures: one for normal signal
+// deliveries, and one for SA_SIGINFO deliveries (also known as RT
+// signals).
+//
+// In theory, so long as we get the arguments to the handler function
+// right, it doesn't matter what the exact layout of the rest of the
+// frame is.  Unfortunately, things like gcc's exception unwinding
+// make assumptions about the locations of various parts of the frame,
+// so we need to duplicate it exactly.
+
+/* Valgrind-specific parts of the signal frame */
+struct vg_sigframe
+{
+   /* Sanity check word. */
+   UInt magicPI;
+
+   UInt handlerflags;	/* flags for signal handler */
+
+
+   /* Safely-saved version of sigNo, as described above. */
+   Int  sigNo_private;
+
+   /* XXX This is wrong.  Surely we should store the shadow values
+      into the shadow memory behind the actual values? */
+   VexGuestS390XState vex_shadow1;
+   VexGuestS390XState vex_shadow2;
+
+   /* HACK ALERT */
+   VexGuestS390XState vex;
+   /* end HACK ALERT */
+
+   /* saved signal mask to be restored when handler returns */
+   vki_sigset_t	mask;
+
+   /* Sanity check word.  Is the highest-addressed word; do not
+      move!*/
+   UInt magicE;
+};
+
+#define S390_SYSCALL_SIZE 2
+
+struct sigframe
+{
+   UChar callee_used_stack[__VKI_SIGNAL_FRAMESIZE];
+   struct vki_sigcontext sc;
+   _vki_sigregs sregs;
+   Int sigNo;
+   UChar retcode[S390_SYSCALL_SIZE];
+
+   struct vg_sigframe vg;
+};
+
+struct rt_sigframe
+{
+   UChar callee_used_stack[__VKI_SIGNAL_FRAMESIZE];
+   UChar retcode[S390_SYSCALL_SIZE];
+   struct vki_siginfo info;
+   struct vki_ucontext uc;
+
+   struct vg_sigframe vg;
+};
+
+/*------------------------------------------------------------*/
+/*--- Creating signal frames                               ---*/
+/*------------------------------------------------------------*/
+
+/* Saves all user-controlled register into a _vki_sigregs structure */
+static void save_sigregs(ThreadState *tst, _vki_sigregs *sigregs)
+{
+   sigregs->regs.gprs[0]  = tst->arch.vex.guest_r0;
+   sigregs->regs.gprs[1]  = tst->arch.vex.guest_r1;
+   sigregs->regs.gprs[2]  = tst->arch.vex.guest_r2;
+   sigregs->regs.gprs[3]  = tst->arch.vex.guest_r3;
+   sigregs->regs.gprs[4]  = tst->arch.vex.guest_r4;
+   sigregs->regs.gprs[5]  = tst->arch.vex.guest_r5;
+   sigregs->regs.gprs[6]  = tst->arch.vex.guest_r6;
+   sigregs->regs.gprs[7]  = tst->arch.vex.guest_r7;
+   sigregs->regs.gprs[8]  = tst->arch.vex.guest_r8;
+   sigregs->regs.gprs[9]  = tst->arch.vex.guest_r9;
+   sigregs->regs.gprs[10] = tst->arch.vex.guest_r10;
+   sigregs->regs.gprs[11] = tst->arch.vex.guest_r11;
+   sigregs->regs.gprs[12] = tst->arch.vex.guest_r12;
+   sigregs->regs.gprs[13] = tst->arch.vex.guest_r13;
+   sigregs->regs.gprs[14] = tst->arch.vex.guest_r14;
+   sigregs->regs.gprs[15] = tst->arch.vex.guest_r15;
+
+   sigregs->regs.acrs[0]  = tst->arch.vex.guest_a0;
+   sigregs->regs.acrs[1]  = tst->arch.vex.guest_a1;
+   sigregs->regs.acrs[2]  = tst->arch.vex.guest_a2;
+   sigregs->regs.acrs[3]  = tst->arch.vex.guest_a3;
+   sigregs->regs.acrs[4]  = tst->arch.vex.guest_a4;
+   sigregs->regs.acrs[5]  = tst->arch.vex.guest_a5;
+   sigregs->regs.acrs[6]  = tst->arch.vex.guest_a6;
+   sigregs->regs.acrs[7]  = tst->arch.vex.guest_a7;
+   sigregs->regs.acrs[8]  = tst->arch.vex.guest_a8;
+   sigregs->regs.acrs[9]  = tst->arch.vex.guest_a9;
+   sigregs->regs.acrs[10] = tst->arch.vex.guest_a10;
+   sigregs->regs.acrs[11] = tst->arch.vex.guest_a11;
+   sigregs->regs.acrs[12] = tst->arch.vex.guest_a12;
+   sigregs->regs.acrs[13] = tst->arch.vex.guest_a13;
+   sigregs->regs.acrs[14] = tst->arch.vex.guest_a14;
+   sigregs->regs.acrs[15] = tst->arch.vex.guest_a15;
+
+   sigregs->fpregs.fprs[0] = tst->arch.vex.guest_f0;
+   sigregs->fpregs.fprs[1] = tst->arch.vex.guest_f1;
+   sigregs->fpregs.fprs[2] = tst->arch.vex.guest_f2;
+   sigregs->fpregs.fprs[3] = tst->arch.vex.guest_f3;
+   sigregs->fpregs.fprs[4] = tst->arch.vex.guest_f4;
+   sigregs->fpregs.fprs[5] = tst->arch.vex.guest_f5;
+   sigregs->fpregs.fprs[6] = tst->arch.vex.guest_f6;
+   sigregs->fpregs.fprs[7] = tst->arch.vex.guest_f7;
+   sigregs->fpregs.fprs[8] = tst->arch.vex.guest_f8;
+   sigregs->fpregs.fprs[9] = tst->arch.vex.guest_f9;
+   sigregs->fpregs.fprs[10] = tst->arch.vex.guest_f10;
+   sigregs->fpregs.fprs[11] = tst->arch.vex.guest_f11;
+   sigregs->fpregs.fprs[12] = tst->arch.vex.guest_f12;
+   sigregs->fpregs.fprs[13] = tst->arch.vex.guest_f13;
+   sigregs->fpregs.fprs[14] = tst->arch.vex.guest_f14;
+   sigregs->fpregs.fprs[15] = tst->arch.vex.guest_f15;
+   sigregs->fpregs.fpc      = tst->arch.vex.guest_fpc;
+
+   sigregs->regs.psw.addr = tst->arch.vex.guest_IA;
+   /* save a sane dummy mask */
+   sigregs->regs.psw.mask = 0x0705000180000000UL;
+}
+
+static void restore_sigregs(ThreadState *tst, _vki_sigregs *sigregs)
+{
+   tst->arch.vex.guest_r0  = sigregs->regs.gprs[0];
+   tst->arch.vex.guest_r1  = sigregs->regs.gprs[1];
+   tst->arch.vex.guest_r2  = sigregs->regs.gprs[2];
+   tst->arch.vex.guest_r3  = sigregs->regs.gprs[3];
+   tst->arch.vex.guest_r4  = sigregs->regs.gprs[4];
+   tst->arch.vex.guest_r5  = sigregs->regs.gprs[5];
+   tst->arch.vex.guest_r6  = sigregs->regs.gprs[6];
+   tst->arch.vex.guest_r7  = sigregs->regs.gprs[7];
+   tst->arch.vex.guest_r8  = sigregs->regs.gprs[8];
+   tst->arch.vex.guest_r9  = sigregs->regs.gprs[9];
+   tst->arch.vex.guest_r10 = sigregs->regs.gprs[10];
+   tst->arch.vex.guest_r11 = sigregs->regs.gprs[11];
+   tst->arch.vex.guest_r12 = sigregs->regs.gprs[12];
+   tst->arch.vex.guest_r13 = sigregs->regs.gprs[13];
+   tst->arch.vex.guest_r14 = sigregs->regs.gprs[14];
+   tst->arch.vex.guest_r15 = sigregs->regs.gprs[15];
+
+   tst->arch.vex.guest_a0  = sigregs->regs.acrs[0];
+   tst->arch.vex.guest_a1  = sigregs->regs.acrs[1];
+   tst->arch.vex.guest_a2  = sigregs->regs.acrs[2];
+   tst->arch.vex.guest_a3  = sigregs->regs.acrs[3];
+   tst->arch.vex.guest_a4  = sigregs->regs.acrs[4];
+   tst->arch.vex.guest_a5  = sigregs->regs.acrs[5];
+   tst->arch.vex.guest_a6  = sigregs->regs.acrs[6];
+   tst->arch.vex.guest_a7  = sigregs->regs.acrs[7];
+   tst->arch.vex.guest_a8  = sigregs->regs.acrs[8];
+   tst->arch.vex.guest_a9  = sigregs->regs.acrs[9];
+   tst->arch.vex.guest_a10 = sigregs->regs.acrs[10];
+   tst->arch.vex.guest_a11 = sigregs->regs.acrs[11];
+   tst->arch.vex.guest_a12 = sigregs->regs.acrs[12];
+   tst->arch.vex.guest_a13 = sigregs->regs.acrs[13];
+   tst->arch.vex.guest_a14 = sigregs->regs.acrs[14];
+   tst->arch.vex.guest_a15 = sigregs->regs.acrs[15];
+
+   tst->arch.vex.guest_f0  = sigregs->fpregs.fprs[0];
+   tst->arch.vex.guest_f1  = sigregs->fpregs.fprs[1];
+   tst->arch.vex.guest_f2  = sigregs->fpregs.fprs[2];
+   tst->arch.vex.guest_f3  = sigregs->fpregs.fprs[3];
+   tst->arch.vex.guest_f4  = sigregs->fpregs.fprs[4];
+   tst->arch.vex.guest_f5  = sigregs->fpregs.fprs[5];
+   tst->arch.vex.guest_f6  = sigregs->fpregs.fprs[6];
+   tst->arch.vex.guest_f7  = sigregs->fpregs.fprs[7];
+   tst->arch.vex.guest_f8  = sigregs->fpregs.fprs[8];
+   tst->arch.vex.guest_f9  = sigregs->fpregs.fprs[9];
+   tst->arch.vex.guest_f10 = sigregs->fpregs.fprs[10];
+   tst->arch.vex.guest_f11 = sigregs->fpregs.fprs[11];
+   tst->arch.vex.guest_f12 = sigregs->fpregs.fprs[12];
+   tst->arch.vex.guest_f13 = sigregs->fpregs.fprs[13];
+   tst->arch.vex.guest_f14 = sigregs->fpregs.fprs[14];
+   tst->arch.vex.guest_f15 = sigregs->fpregs.fprs[15];
+   tst->arch.vex.guest_fpc = sigregs->fpregs.fpc;
+
+   tst->arch.vex.guest_IA = sigregs->regs.psw.addr;
+}
+
+/* Extend the stack segment downwards if needed so as to ensure the
+   new signal frames are mapped to something.  Return a Bool
+   indicating whether or not the operation was successful.
+*/
+static Bool extend ( ThreadState *tst, Addr addr, SizeT size )
+{
+   ThreadId        tid = tst->tid;
+   NSegment const* stackseg = NULL;
+
+   if (VG_(extend_stack)(addr, tst->client_stack_szB)) {
+      stackseg = VG_(am_find_nsegment)(addr);
+      if (0 && stackseg)
+	 VG_(printf)("frame=%#lx seg=%#lx-%#lx\n",
+		     addr, stackseg->start, stackseg->end);
+   }
+
+   if (stackseg == NULL || !stackseg->hasR || !stackseg->hasW) {
+      VG_(message)(
+         Vg_UserMsg,
+         "Can't extend stack to %#lx during signal delivery for thread %d:\n",
+         addr, tid);
+      if (stackseg == NULL)
+         VG_(message)(Vg_UserMsg, "  no stack segment\n");
+      else
+         VG_(message)(Vg_UserMsg, "  too small or bad protection modes\n");
+
+      /* set SIGSEGV to default handler */
+      VG_(set_default_handler)(VKI_SIGSEGV);
+      VG_(synth_fault_mapping)(tid, addr);
+
+      /* The whole process should be about to die, since the default
+	 action of SIGSEGV to kill the whole process. */
+      return False;
+   }
+
+   /* For tracking memory events, indicate the entire frame has been
+      allocated. */
+   VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
+             size + VG_STACK_REDZONE_SZB, tid );
+
+   return True;
+}
+
+
+/* Build the Valgrind-specific part of a signal frame. */
+
+static void build_vg_sigframe(struct vg_sigframe *frame,
+			      ThreadState *tst,
+			      UInt flags,
+			      Int sigNo)
+{
+   frame->sigNo_private = sigNo;
+   frame->magicPI       = 0x31415927;
+   frame->vex_shadow1   = tst->arch.vex_shadow1;
+   frame->vex_shadow2   = tst->arch.vex_shadow2;
+   /* HACK ALERT */
+   frame->vex           = tst->arch.vex;
+   /* end HACK ALERT */
+   frame->mask          = tst->sig_mask;
+   frame->handlerflags  = flags;
+   frame->magicE        = 0x27182818;
+}
+
+
+static Addr build_sigframe(ThreadState *tst,
+			   Addr sp_top_of_frame,
+			   const vki_siginfo_t *siginfo,
+			   const struct vki_ucontext *siguc,
+			   UInt flags,
+			   const vki_sigset_t *mask,
+			   void *restorer)
+{
+   struct sigframe *frame;
+   Addr sp = sp_top_of_frame;
+
+   vg_assert((flags & VKI_SA_SIGINFO) == 0);
+   vg_assert((sizeof(*frame) & 7) == 0);
+   vg_assert((sp & 7) == 0);
+
+   sp -= sizeof(*frame);
+   frame = (struct sigframe *)sp;
+
+   if (!extend(tst, sp, sizeof(*frame)))
+      return sp_top_of_frame;
+
+   /* retcode, sigNo, sc, sregs fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst->tid, "signal handler frame",
+	     sp, offsetof(struct sigframe, vg) );
+
+   save_sigregs(tst, &frame->sregs);
+
+   frame->sigNo = siginfo->si_signo;
+   frame->sc.sregs = &frame->sregs;
+   VG_(memcpy)(frame->sc.oldmask, mask->sig, sizeof(frame->sc.oldmask));
+
+   if (flags & VKI_SA_RESTORER) {
+      SET_SIGNAL_GPR(tst, 14, restorer);
+   } else {
+      frame->retcode[0] = 0x0a;
+      frame->retcode[1] = __NR_sigreturn;
+      /* This normally should be &frame->recode. but since there
+         might be problems with non-exec stack and we must discard
+         the translation for the on-stack sigreturn we just use the
+         trampoline like x86,ppc. We still fill in the retcode, lets
+         just hope that nobody actually jumps here */
+      SET_SIGNAL_GPR(tst, 14, (Addr)&VG_(s390x_linux_SUBST_FOR_sigreturn));
+   }
+
+   SET_SIGNAL_GPR(tst, 2, siginfo->si_signo);
+   SET_SIGNAL_GPR(tst, 3, &frame->sc);
+   /* fixs390: we dont fill in trapno and prot_addr in r4 and r5*/
+
+   /* Set up backchain. */
+   *((Addr *) sp) = sp_top_of_frame;
+
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tst->tid,
+             sp, offsetof(struct sigframe, vg) );
+
+   build_vg_sigframe(&frame->vg, tst, flags, siginfo->si_signo);
+
+   return sp;
+}
+
+static Addr build_rt_sigframe(ThreadState *tst,
+			      Addr sp_top_of_frame,
+			      const vki_siginfo_t *siginfo,
+			      const struct vki_ucontext *siguc,
+			      UInt flags,
+			      const vki_sigset_t *mask,
+			      void *restorer)
+{
+   struct rt_sigframe *frame;
+   Addr sp = sp_top_of_frame;
+   Int sigNo = siginfo->si_signo;
+
+   vg_assert((flags & VKI_SA_SIGINFO) != 0);
+   vg_assert((sizeof(*frame) & 7) == 0);
+   vg_assert((sp & 7) == 0);
+
+   sp -= sizeof(*frame);
+   frame = (struct rt_sigframe *)sp;
+
+   if (!extend(tst, sp, sizeof(*frame)))
+      return sp_top_of_frame;
+
+   /* retcode, sigNo, sc, sregs fields are to be written */
+   VG_TRACK( pre_mem_write, Vg_CoreSignal, tst->tid, "signal handler frame",
+	     sp, offsetof(struct rt_sigframe, vg) );
+
+   save_sigregs(tst, &frame->uc.uc_mcontext);
+
+   if (flags & VKI_SA_RESTORER) {
+      frame->retcode[0] = 0;
+      frame->retcode[1] = 0;
+      SET_SIGNAL_GPR(tst, 14, restorer);
+   } else {
+      frame->retcode[0] = 0x0a;
+      frame->retcode[1] = __NR_rt_sigreturn;
+      /* This normally should be &frame->recode. but since there
+         might be problems with non-exec stack and we must discard
+         the translation for the on-stack sigreturn we just use the
+         trampoline like x86,ppc. We still fill in the retcode, lets
+         just hope that nobody actually jumps here */
+      SET_SIGNAL_GPR(tst, 14, (Addr)&VG_(s390x_linux_SUBST_FOR_rt_sigreturn));
+   }
+
+   VG_(memcpy)(&frame->info, siginfo, sizeof(vki_siginfo_t));
+   frame->uc.uc_flags = 0;
+   frame->uc.uc_link = 0;
+   frame->uc.uc_sigmask = *mask;
+   frame->uc.uc_stack = tst->altstack;
+
+   SET_SIGNAL_GPR(tst, 2, siginfo->si_signo);
+   SET_SIGNAL_GPR(tst, 3, &frame->info);
+   SET_SIGNAL_GPR(tst, 4, &frame->uc);
+
+   /* Set up backchain. */
+   *((Addr *) sp) = sp_top_of_frame;
+
+   VG_TRACK( post_mem_write, Vg_CoreSignal, tst->tid,
+             sp, offsetof(struct rt_sigframe, vg) );
+
+   build_vg_sigframe(&frame->vg, tst, flags, sigNo);
+   return sp;
+}
+
+/* EXPORTED */
+void VG_(sigframe_create)( ThreadId tid,
+			   Addr sp_top_of_frame,
+			   const vki_siginfo_t *siginfo,
+			   const struct vki_ucontext *siguc,
+			   void *handler,
+			   UInt flags,
+			   const vki_sigset_t *mask,
+			   void *restorer )
+{
+   Addr sp;
+   ThreadState* tst = VG_(get_ThreadState)(tid);
+
+   if (flags & VKI_SA_SIGINFO)
+      sp = build_rt_sigframe(tst, sp_top_of_frame, siginfo, siguc,
+			     flags, mask, restorer);
+   else
+      sp = build_sigframe(tst, sp_top_of_frame, siginfo, siguc,
+			  flags, mask, restorer);
+
+   /* Set the thread so it will next run the handler. */
+   VG_(set_SP)(tid, sp);
+   VG_TRACK( post_reg_write, Vg_CoreSignal, tid, VG_O_STACK_PTR, sizeof(Addr));
+
+   tst->arch.vex.guest_IA = (Addr) handler;
+   /* We might have interrupted a repeating instruction that uses the guest
+      counter. Since our VEX requires that a new instruction will see a
+      guest counter == 0, we have to set it here. The old value will be
+      restored by restore_vg_sigframe. */
+   tst->arch.vex.guest_counter = 0;
+   /* This thread needs to be marked runnable, but we leave that the
+      caller to do. */
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Destroying signal frames                             ---*/
+/*------------------------------------------------------------*/
+
+/* Return False and don't do anything, just set the client to take a
+   segfault, if it looks like the frame is corrupted. */
+static
+Bool restore_vg_sigframe ( ThreadState *tst,
+                           struct vg_sigframe *frame, Int *sigNo )
+{
+   if (frame->magicPI != 0x31415927 ||
+       frame->magicE  != 0x27182818) {
+      VG_(message)(Vg_UserMsg, "Thread %d return signal frame "
+			       "corrupted.  Killing process.\n",
+		   tst->tid);
+      VG_(set_default_handler)(VKI_SIGSEGV);
+      VG_(synth_fault)(tst->tid);
+      *sigNo = VKI_SIGSEGV;
+      return False;
+   }
+   tst->sig_mask         = frame->mask;
+   tst->tmp_sig_mask     = frame->mask;
+   tst->arch.vex_shadow1 = frame->vex_shadow1;
+   tst->arch.vex_shadow2 = frame->vex_shadow2;
+   /* HACK ALERT */
+   tst->arch.vex         = frame->vex;
+   /* end HACK ALERT */
+   *sigNo                = frame->sigNo_private;
+   return True;
+}
+
+static
+SizeT restore_sigframe ( ThreadState *tst,
+                         struct sigframe *frame, Int *sigNo )
+{
+   if (restore_vg_sigframe(tst, &frame->vg, sigNo))
+      restore_sigregs(tst, frame->sc.sregs);
+
+   return sizeof(*frame);
+}
+
+static
+SizeT restore_rt_sigframe ( ThreadState *tst,
+                            struct rt_sigframe *frame, Int *sigNo )
+{
+   if (restore_vg_sigframe(tst, &frame->vg, sigNo)) {
+      restore_sigregs(tst, &frame->uc.uc_mcontext);
+   }
+   return sizeof(*frame);
+}
+
+
+/* EXPORTED */
+void VG_(sigframe_destroy)( ThreadId tid, Bool isRT )
+{
+   Addr          sp;
+   ThreadState*  tst;
+   SizeT         size;
+   Int            sigNo;
+
+   tst = VG_(get_ThreadState)(tid);
+
+   /* Correctly reestablish the frame base address. */
+   sp   = tst->arch.vex.guest_SP;
+
+   if (!isRT)
+      size = restore_sigframe(tst, (struct sigframe *)sp, &sigNo);
+   else
+      size = restore_rt_sigframe(tst, (struct rt_sigframe *)sp, &sigNo);
+
+   /* same as for creation: we must announce the full memory (including
+      alignment), otherwise massif might fail on longjmp */
+   VG_TRACK( die_mem_stack_signal, sp - VG_STACK_REDZONE_SZB,
+             size + VG_STACK_REDZONE_SZB );
+
+   if (VG_(clo_trace_signals))
+      VG_(message)(
+         Vg_DebugMsg,
+         "VG_(sigframe_destroy) (thread %d): isRT=%d valid magic; IP=%#llx\n",
+         tid, isRT, tst->arch.vex.guest_IA);
+
+   /* tell the tools */
+   VG_TRACK( post_deliver_signal, tid, sigNo );
+}
+
+#endif /* VGA_s390x */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                   sigframe-s390x-linux.c ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_signals.c b/coregrind/m_signals.c
index a3b7690..4171762 100644
--- a/coregrind/m_signals.c
+++ b/coregrind/m_signals.c
@@ -523,6 +523,23 @@
       I_die_here;
    }
 
+#elif defined(VGP_s390x_linux)
+
+#  define VG_UCONTEXT_INSTR_PTR(uc)       ((uc)->uc_mcontext.regs.psw.addr)
+#  define VG_UCONTEXT_STACK_PTR(uc)       ((uc)->uc_mcontext.regs.gprs[15])
+#  define VG_UCONTEXT_FRAME_PTR(uc)       ((uc)->uc_mcontext.regs.gprs[11])
+#  define VG_UCONTEXT_SYSCALL_SYSRES(uc)                        \
+      VG_(mk_SysRes_s390x_linux)((uc)->uc_mcontext.regs.gprs[2])
+#  define VG_UCONTEXT_LINK_REG(uc) ((uc)->uc_mcontext.regs.gprs[14])
+
+#  define VG_UCONTEXT_TO_UnwindStartRegs(srP, uc)        \
+      { (srP)->r_pc = (ULong)((uc)->uc_mcontext.regs.psw.addr);    \
+        (srP)->r_sp = (ULong)((uc)->uc_mcontext.regs.gprs[15]);    \
+        (srP)->misc.S390X.r_fp = (uc)->uc_mcontext.regs.gprs[11];  \
+        (srP)->misc.S390X.r_lr = (uc)->uc_mcontext.regs.gprs[14];  \
+      }
+
+
 #else 
 #  error Unknown platform
 #endif
@@ -852,6 +869,13 @@
    "my_sigreturn:\n" \
    "ud2\n"
 
+#elif defined(VGP_s390x_linux)
+#  define _MY_SIGRETURN(name) \
+   ".text\n" \
+   "my_sigreturn:\n" \
+   " svc " #name "\n" \
+   ".previous\n"
+
 #else
 #  error Unknown platform
 #endif
@@ -1862,6 +1886,7 @@
    uc.uc_mcontext->__es.__err = 0;
 #  endif
 
+   /* fixs390: do we need to do anything here for s390 ? */
    resume_scheduler(tid);
    deliver_signal(tid, &info, &uc);
 }
@@ -2210,6 +2235,19 @@
    }
 }
 
+/* Returns the reported fault address for an exact address */
+static Addr fault_mask(Addr in)
+{
+   /*  We have to use VG_PGROUNDDN because faults on s390x only deliver
+       the page address but not the address within a page.
+    */
+#  if defined(VGA_s390x)
+   return VG_PGROUNDDN(in);
+#  else
+   return in;
+#endif
+}
+
 /* Returns True if the sync signal was due to the stack requiring extension
    and the extension was successful.
 */
@@ -2247,7 +2285,7 @@
        && seg_next
        && seg_next->kind == SkAnonC
        && seg->end+1 == seg_next->start
-       && fault >= (esp - VG_STACK_REDZONE_SZB)) {
+       && fault >= fault_mask(esp - VG_STACK_REDZONE_SZB)) {
       /* If the fault address is above esp but below the current known
          stack segment base, and it was a fault because there was
          nothing mapped there (as opposed to a permissions fault),
diff --git a/coregrind/m_stacktrace.c b/coregrind/m_stacktrace.c
index 489f6d7..707b470 100644
--- a/coregrind/m_stacktrace.c
+++ b/coregrind/m_stacktrace.c
@@ -670,6 +670,85 @@
 
 #endif
 
+/* ------------------------ s390x ------------------------- */
+#if defined(VGP_s390x_linux)
+UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
+                               /*OUT*/Addr* ips, UInt max_n_ips,
+                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
+                               UnwindStartRegs* startRegs,
+                               Addr fp_max_orig )
+{
+   Bool  debug = False;
+   Int   i;
+   Addr  fp_max;
+   UInt  n_found = 0;
+
+   vg_assert(sizeof(Addr) == sizeof(UWord));
+   vg_assert(sizeof(Addr) == sizeof(void*));
+
+   D3UnwindRegs uregs;
+   uregs.ia = startRegs->r_pc;
+   uregs.sp = startRegs->r_sp;
+   Addr fp_min = uregs.sp;
+   uregs.fp = startRegs->misc.S390X.r_fp;
+   uregs.lr = startRegs->misc.S390X.r_lr;
+
+   fp_max = VG_PGROUNDUP(fp_max_orig);
+   if (fp_max >= sizeof(Addr))
+      fp_max -= sizeof(Addr);
+
+   if (debug)
+      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
+                  "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
+                  max_n_ips, fp_min, fp_max_orig, fp_max,
+                  uregs.ia, uregs.sp,uregs.fp);
+
+   /* The first frame is pretty obvious */
+   ips[0] = uregs.ia;
+   if (sps) sps[0] = uregs.sp;
+   if (fps) fps[0] = uregs.fp;
+   i = 1;
+
+   /* for everything else we have to rely on the eh_frame. gcc defaults to
+      not create a backchain and all the other  tools (like gdb) also have
+      to use the CFI. */
+   while (True) {
+      if (i >= max_n_ips)
+         break;
+
+      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
+         if (sps) sps[i] = uregs.sp;
+         if (fps) fps[i] = uregs.fp;
+         ips[i++] = uregs.ia - 1;
+         uregs.ia = uregs.ia - 1;
+         continue;
+      }
+      /* A problem on the first frame? Lets assume it was a bad jump.
+         We will use the link register and the current stack and frame
+         pointers and see if we can use the CFI in the next round. */
+      if (i == 1) {
+         if (sps) {
+            sps[i] = sps[0];
+            uregs.sp = sps[0];
+         }
+         if (fps) {
+            fps[i] = fps[0];
+            uregs.fp = fps[0];
+         }
+         uregs.ia = uregs.lr - 1;
+         ips[i++] = uregs.lr - 1;
+         continue;
+      }
+
+      /* No luck.  We have to give up. */
+      break;
+   }
+
+   n_found = i;
+   return n_found;
+}
+#endif
+
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
 /*--- END platform-dependent unwinder worker functions     ---*/
diff --git a/coregrind/m_syscall.c b/coregrind/m_syscall.c
index 531d3e0..296a74d 100644
--- a/coregrind/m_syscall.c
+++ b/coregrind/m_syscall.c
@@ -100,6 +100,17 @@
    return res;
 }
 
+SysRes VG_(mk_SysRes_s390x_linux) ( Long val ) {
+   SysRes res;
+   res._isError = val >= -4095 && val <= -1;
+   if (res._isError) {
+      res._val = -val;
+   } else {
+      res._val = val;
+   }
+   return res;
+}
+
 SysRes VG_(mk_SysRes_arm_linux) ( Int val ) {
    SysRes res;
    res._isError = val >= -4095 && val <= -1;
@@ -719,6 +730,38 @@
     "        retq                     \n"
     );
 
+#elif defined(VGP_s390x_linux)
+
+static UWord do_syscall_WRK (
+   UWord syscall_no,
+   UWord arg1, UWord arg2, UWord arg3,
+   UWord arg4, UWord arg5, UWord arg6
+   )
+{
+   register UWord __arg1 asm("2") = arg1;
+   register UWord __arg2 asm("3") = arg2;
+   register UWord __arg3 asm("4") = arg3;
+   register UWord __arg4 asm("5") = arg4;
+   register UWord __arg5 asm("6") = arg5;
+   register UWord __arg6 asm("7") = arg6;
+   register ULong __svcres asm("2");
+
+   __asm__ __volatile__ (
+                 "lgr %%r1,%1\n\t"
+                 "svc 0\n\t"
+		: "=d" (__svcres)
+		: "a" (syscall_no),
+		  "0" (__arg1),
+		  "d" (__arg2),
+		  "d" (__arg3),
+		  "d" (__arg4),
+		  "d" (__arg5),
+		  "d" (__arg6)
+		: "1", "cc", "memory");
+
+   return (UWord) (__svcres);
+}
+
 #else
 #  error Unknown platform
 #endif
@@ -846,6 +889,24 @@
    }
    return VG_(mk_SysRes_amd64_darwin)( scclass, err ? True : False, wHI, wLO );
   
+#elif defined(VGP_s390x_linux)
+   UWord val;
+
+   if (sysno == __NR_mmap) {
+     ULong argbuf[6];
+
+     argbuf[0] = a1;
+     argbuf[1] = a2;
+     argbuf[2] = a3;
+     argbuf[3] = a4;
+     argbuf[4] = a5;
+     argbuf[5] = a6;
+     val = do_syscall_WRK(sysno,(UWord)&argbuf[0],0,0,0,0,0);
+   } else {
+     val = do_syscall_WRK(sysno,a1,a2,a3,a4,a5,a6);
+   }
+
+   return VG_(mk_SysRes_s390x_linux)( val );
 #else
 #  error Unknown platform
 #endif
diff --git a/coregrind/m_syswrap/priv_types_n_macros.h b/coregrind/m_syswrap/priv_types_n_macros.h
index a145594..0887b9c 100644
--- a/coregrind/m_syswrap/priv_types_n_macros.h
+++ b/coregrind/m_syswrap/priv_types_n_macros.h
@@ -89,7 +89,7 @@
       Int o_sysno;
 #     if defined(VGP_x86_linux) || defined(VGP_amd64_linux) \
          || defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
-         || defined(VGP_arm_linux)
+         || defined(VGP_arm_linux) || defined(VGP_s390x_linux)
       Int o_arg1;
       Int o_arg2;
       Int o_arg3;
diff --git a/coregrind/m_syswrap/syscall-s390x-linux.S b/coregrind/m_syswrap/syscall-s390x-linux.S
new file mode 100644
index 0000000..72cf740
--- /dev/null
+++ b/coregrind/m_syswrap/syscall-s390x-linux.S
@@ -0,0 +1,172 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Support for doing system calls.        syscall-s390x-linux.S ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Christian Borntraeger */
+
+#include "pub_core_basics_asm.h"
+#include "pub_core_vkiscnums_asm.h"
+#include "libvex_guest_offsets.h"
+
+#if defined(VGA_s390x)
+
+/*----------------------------------------------------------------*/
+/*
+        Perform a syscall for the client.  This will run a syscall
+        with the client's specific per-thread signal mask.
+
+        The structure of this function is such that, if the syscall is
+        interrupted by a signal, we can determine exactly what
+        execution state we were in with respect to the execution of
+        the syscall by examining the value of NIP in the signal
+        handler.  This means that we can always do the appropriate
+        thing to precisely emulate the kernel's signal/syscall
+        interactions.
+
+        The syscall number is taken from the argument, since the syscall
+        number can be encoded in the svc instruction itself.
+        The syscall result is written back to guest register r2.
+
+        Returns 0 if the syscall was successfully called (even if the
+        syscall itself failed), or a nonzero error code in the lowest
+	8 bits if one of the sigprocmasks failed (there's no way to
+	determine which one failed).  And there's no obvious way to
+	recover from that either, but nevertheless we want to know.
+
+        VG_(fixup_guest_state_after_syscall_interrupted) does the
+	thread state fixup in the case where we were interrupted by a
+	signal.
+
+        Prototype:
+
+	UWord ML_(do_syscall_for_client_WRK)(
+				  Int syscallno,		// r2
+				  void* guest_state,		// r3
+				  const vki_sigset_t *sysmask,	// r4
+				  const vki_sigset_t *postmask,	// r5
+				  Int nsigwords)		// r6
+*/
+/* from vki_arch.h */
+#define VKI_SIG_SETMASK 2
+
+#define SP_SAVE 16
+#define SP_R2	SP_SAVE + 0*8
+#define SP_R3	SP_SAVE + 1*8
+#define SP_R4	SP_SAVE + 2*8
+#define SP_R5	SP_SAVE + 3*8
+#define SP_R6	SP_SAVE + 4*8
+#define SP_R7	SP_SAVE + 5*8
+#define SP_R8	SP_SAVE + 6*8
+#define SP_R9	SP_SAVE + 7*8
+
+.align 4
+.globl ML_(do_syscall_for_client_WRK)
+ML_(do_syscall_for_client_WRK):
+1:	/* Even though we can't take a signal until the sigprocmask completes,
+	start the range early.
+	If IA is in the range [1,2), the syscall hasn't been started yet */
+
+	/* Set the signal mask which should be current during the syscall. */
+	/* Save and restore all the parameters and all the registers that
+	   we clobber (r6-r9) */
+	stmg	%r2,%r9, SP_R2(%r15)
+
+	lghi	%r2, VKI_SIG_SETMASK		/* how */
+	lgr	%r3, %r4			/* sysmask */
+	lgr	%r4, %r5			/* postmask */
+	lgr	%r5, %r6			/* nsigwords */
+	svc	__NR_rt_sigprocmask
+	cghi	%r2, 0x0
+	jne	7f				/* sigprocmask failed */
+
+	/* OK, that worked.  Now do the syscall proper. */
+	lg	%r9, SP_R3(%r15)		/* guest state --> r9 */
+	lg	%r2, OFFSET_s390x_r2(%r9)	/* guest r2 --> real r2 */
+	lg	%r3, OFFSET_s390x_r3(%r9)	/* guest r3 --> real r3 */
+	lg	%r4, OFFSET_s390x_r4(%r9)	/* guest r4 --> real r4 */
+	lg	%r5, OFFSET_s390x_r5(%r9)	/* guest r5 --> real r5 */
+	lg	%r6, OFFSET_s390x_r6(%r9)	/* guest r6 --> real r6 */
+	lg	%r7, OFFSET_s390x_r7(%r9)	/* guest r7 --> real r7 */
+	lg	%r1, SP_R2(%r15)		/* syscallno -> r1 */
+
+2:	svc	0
+
+3:
+	stg	%r2, OFFSET_s390x_r2(%r9)
+
+4:	/* Re-block signals.  If IA is in [4,5), then the syscall
+	   is complete and we needn't worry about it. */
+	lghi	%r2, VKI_SIG_SETMASK		/* how */
+	lg	%r3, SP_R5(%r15)		/* postmask */
+	lghi	%r4, 0x0			/* NULL */
+	lg	%r5, SP_R6(%r15)		/* nsigwords */
+	svc	__NR_rt_sigprocmask
+	cghi	%r2, 0x0
+	jne	7f				/* sigprocmask failed */
+
+5:	/* Everyting ok. Return 0 and restore the call-saved
+	   registers, that we have clobbered */
+	lghi	%r2, 0x0
+	lmg	%r6,%r9, SP_R6(%r15)
+	br	%r14
+
+7:	/* Some problem. Return 0x8000 | error and restore the call-saved
+	   registers we have clobbered. */
+	nill	%r2, 0x7fff
+	oill	%r2, 0x8000
+	lmg	%r6,%r9, SP_R6(%r15)
+	br	%r14
+
+.section .rodata
+/* Export the ranges so that
+   VG_(fixup_guest_state_after_syscall_interrupted) can do the
+   right thing */
+
+.globl ML_(blksys_setup)
+.globl ML_(blksys_restart)
+.globl ML_(blksys_complete)
+.globl ML_(blksys_committed)
+.globl ML_(blksys_finished)
+
+/* The compiler can assume that 8 byte data elements are aligned on 8 byte */
+.align 8
+ML_(blksys_setup):     .quad 1b
+ML_(blksys_restart):   .quad 2b
+ML_(blksys_complete):  .quad 3b
+ML_(blksys_committed): .quad 4b
+ML_(blksys_finished):  .quad 5b
+.previous
+
+/* Let the linker know we don't need an executable stack */
+.section .note.GNU-stack,"",@progbits
+
+#endif /* VGA_s390x */
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_syswrap/syswrap-generic.c b/coregrind/m_syswrap/syswrap-generic.c
index 1fbd1fe..9f4025c 100644
--- a/coregrind/m_syswrap/syswrap-generic.c
+++ b/coregrind/m_syswrap/syswrap-generic.c
@@ -1958,6 +1958,11 @@
  *   call, mmap (aka sys_mmap)  which takes the arguments in the
  *   normal way and the offset in bytes.
  *
+ * - On s390x-linux there is mmap (aka old_mmap) which takes the
+ *   arguments in a memory block and the offset in bytes. mmap2
+ *   is also available (but not exported via unistd.h) with
+ *   arguments in a memory block and the offset in pages.
+ *
  * To cope with all this we provide a generic handler function here
  * and then each platform implements one or more system call handlers
  * which call this generic routine after extracting and normalising
diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c
index b0e5ca3..35f8ee6 100644
--- a/coregrind/m_syswrap/syswrap-linux.c
+++ b/coregrind/m_syswrap/syswrap-linux.c
@@ -206,6 +206,14 @@
          "svc  0x00000000\n"  /* exit(tst->os_state.exitcode) */
          : "=m" (tst->status)
          : "r" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode));
+#elif defined(VGP_s390x_linux)
+      asm volatile (
+         "st   %1, %0\n"        /* set tst->status = VgTs_Empty */
+         "lg   2, %3\n"         /* set r2 = tst->os_state.exitcode */
+         "svc %2\n"             /* exit(tst->os_state.exitcode) */
+         : "=m" (tst->status)
+         : "d" (VgTs_Empty), "n" (__NR_exit), "m" (tst->os_state.exitcode)
+         : "2");
 #else
 # error Unknown platform
 #endif
@@ -288,6 +296,11 @@
    sp -= 112;
    sp &= ~((Addr)0xF);
    *(UWord *)sp = 0;
+#elif defined(VGP_s390x_linux)
+   /* make a stack frame */
+   sp -= 160;
+   sp &= ~((Addr)0xF);
+   *(UWord *)sp = 0;
 #endif
 
    /* If we can't even allocate the first thread's stack, we're hosed.
@@ -342,6 +355,10 @@
    res = VG_(do_syscall5)( __NR_clone, flags, 
                            (UWord)NULL, (UWord)parent_tidptr, 
                            (UWord)child_tidptr, (UWord)NULL );
+#elif defined(VGP_s390x_linux)
+   /* Note that s390 has the stack first and then the flags */
+   res = VG_(do_syscall4)( __NR_clone, (UWord) NULL, flags,
+                          (UWord)parent_tidptr, (UWord)child_tidptr);
 #else
 # error Unknown platform
 #endif
@@ -3566,7 +3583,7 @@
 }
 #endif
 
-#if defined(VGP_amd64_linux)
+#if defined(VGP_amd64_linux) || defined(VGP_s390x_linux)
 PRE(sys_lookup_dcookie)
 {
    *flags |= SfMayBlock;
diff --git a/coregrind/m_syswrap/syswrap-main.c b/coregrind/m_syswrap/syswrap-main.c
index 21f0f88..4d33eb8 100644
--- a/coregrind/m_syswrap/syswrap-main.c
+++ b/coregrind/m_syswrap/syswrap-main.c
@@ -60,14 +60,20 @@
 /* Useful info which needs to be recorded somewhere:
    Use of registers in syscalls is:
 
-          NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
+          NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
    LINUX:
-   x86    eax ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
-   amd64  rax rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
-   ppc32  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
-   ppc64  r0  r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
-   arm    r7  r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
-
+   x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
+   amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
+   ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+   ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
+   arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
+   On s390x the svc instruction is used for system calls. The system call
+   number is encoded in the instruction (8 bit immediate field). Since Linux
+   2.6 it is also allowed to use svc 0 with the system call number in r1.
+   This was introduced for system calls >255, but works for all. It is
+   also possible to see the svc 0 together with an EXecute instruction, that
+   fills in the immediate field.
+   s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
    AIX:
    ppc32  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
    ppc64  r2  r3   r4   r5   r6   r7   r8   r9   r10  r3(res),r4(err)
@@ -160,6 +166,9 @@
      x86:    Success(N) ==>  edx:eax = N, cc = 0
              Fail(N)    ==>  edx:eax = N, cc = 1
 
+     s390x:  Success(N) ==>  r2 = N
+             Fail(N)    ==>  r2 = -N
+
    * The post wrapper is called if:
 
      - it exists, and
@@ -611,6 +620,17 @@
 
    // no canonical->sysno adjustment needed
 
+#elif defined(VGP_s390x_linux)
+   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
+   canonical->sysno = gst->guest_SYSNO;
+   canonical->arg1  = gst->guest_r2;
+   canonical->arg2  = gst->guest_r3;
+   canonical->arg3  = gst->guest_r4;
+   canonical->arg4  = gst->guest_r5;
+   canonical->arg5  = gst->guest_r6;
+   canonical->arg6  = gst->guest_r7;
+   canonical->arg7  = 0;
+   canonical->arg8  = 0;
 #else
 #  error "getSyscallArgsFromGuestState: unknown arch"
 #endif
@@ -728,6 +748,16 @@
    stack[1]       = canonical->arg7;
    stack[2]       = canonical->arg8;
 
+#elif defined(VGP_s390x_linux)
+   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
+   gst->guest_SYSNO  = canonical->sysno;
+   gst->guest_r2     = canonical->arg1;
+   gst->guest_r3     = canonical->arg2;
+   gst->guest_r4     = canonical->arg3;
+   gst->guest_r5     = canonical->arg4;
+   gst->guest_r6     = canonical->arg5;
+   gst->guest_r7     = canonical->arg6;
+
 #else
 #  error "putSyscallArgsIntoGuestState: unknown arch"
 #endif
@@ -842,6 +872,11 @@
                      );
    canonical->what = SsComplete;
 
+#  elif defined(VGP_s390x_linux)
+   VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
+   canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
+   canonical->what = SsComplete;
+
 #  else
 #    error "getSyscallStatusFromGuestState: unknown arch"
 #  endif
@@ -1016,6 +1051,15 @@
          break;
    }
    
+#  elif defined(VGP_s390x_linux)
+   VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
+   vg_assert(canonical->what == SsComplete);
+   if (sr_isError(canonical->sres)) {
+      gst->guest_r2 = - (Long)sr_Err(canonical->sres);
+   } else {
+      gst->guest_r2 = sr_Res(canonical->sres);
+   }
+
 #  else
 #    error "putSyscallStatusIntoGuestState: unknown arch"
 #  endif
@@ -1129,6 +1173,16 @@
    layout->s_arg7   = sizeof(UWord) * 1;
    layout->s_arg8   = sizeof(UWord) * 2;
 
+#elif defined(VGP_s390x_linux)
+   layout->o_sysno  = OFFSET_s390x_SYSNO;
+   layout->o_arg1   = OFFSET_s390x_r2;
+   layout->o_arg2   = OFFSET_s390x_r3;
+   layout->o_arg3   = OFFSET_s390x_r4;
+   layout->o_arg4   = OFFSET_s390x_r5;
+   layout->o_arg5   = OFFSET_s390x_r6;
+   layout->o_arg6   = OFFSET_s390x_r7;
+   layout->uu_arg7  = -1; /* impossible value */
+   layout->uu_arg8  = -1; /* impossible value */
 #else
 #  error "getSyscallLayout: unknown arch"
 #endif
@@ -1957,6 +2011,23 @@
    // DDD: #warning GrP fixme amd64 restart unimplemented
    vg_assert(0);
    
+#elif defined(VGP_s390x_linux)
+   arch->vex.guest_IA -= 2;             // sizeof(syscall)
+
+   /* Make sure our caller is actually sane, and we're really backing
+      back over a syscall.
+
+      syscall == 0A <num>
+   */
+   {
+      UChar *p = (UChar *)arch->vex.guest_IA;
+      if (p[0] != 0x0A)
+         VG_(message)(Vg_DebugMsg,
+                      "?! restarting over syscall at %#llx %02x %02x\n",
+                      arch->vex.guest_IA, p[0], p[1]);
+
+      vg_assert(p[0] == 0x0A);
+   }
 #else
 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
 #endif
diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c
new file mode 100644
index 0000000..eb2f290
--- /dev/null
+++ b/coregrind/m_syswrap/syswrap-s390x-linux.c
@@ -0,0 +1,1527 @@
+
+/*--------------------------------------------------------------------*/
+/*--- Platform-specific syscalls stuff.      syswrap-s390x-linux.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Christian Borntraeger */
+
+#if defined(VGP_s390x_linux)
+
+#include "pub_core_basics.h"
+#include "pub_core_vki.h"
+#include "pub_core_vkiscnums.h"
+#include "pub_core_threadstate.h"
+#include "pub_core_aspacemgr.h"
+#include "pub_core_debuglog.h"
+#include "pub_core_libcbase.h"
+#include "pub_core_libcassert.h"
+#include "pub_core_libcprint.h"
+#include "pub_core_libcproc.h"
+#include "pub_core_libcsignal.h"
+#include "pub_core_mallocfree.h"
+#include "pub_core_options.h"
+#include "pub_core_scheduler.h"
+#include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
+#include "pub_core_signals.h"
+#include "pub_core_syscall.h"
+#include "pub_core_syswrap.h"
+#include "pub_core_tooliface.h"
+#include "pub_core_stacks.h"        // VG_(register_stack)
+
+#include "priv_types_n_macros.h"
+#include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
+#include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
+#include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
+#include "priv_syswrap-main.h"
+
+
+/* ---------------------------------------------------------------------
+   clone() handling
+   ------------------------------------------------------------------ */
+
+/* Call f(arg1), but first switch stacks, using 'stack' as the new
+   stack, and use 'retaddr' as f's return-to address.  Also, clear all
+   the integer registers before entering f.
+   Thought: Why are we clearing the GPRs ? The callee pointed to by f
+   is a regular C function which will play by the ABI rules. So there is
+   no need to zero out the GPRs. If we assumed that f accesses registers at
+   will, then it would make sense to create a defined register state.
+   But then, why only for the GPRs and not the FPRs ? */
+__attribute__((noreturn))
+void ML_(call_on_new_stack_0_1) ( Addr stack,
+                                  Addr retaddr,
+                                  void (*f)(Word),
+                                  Word arg1 );
+/* Upon entering this function we have the following setup:
+     r2 = stack
+     r3 = retaddr
+     r4 = f_desc
+     r5 = arg1
+*/
+asm(
+    ".text\n"
+    ".align 4\n"
+    ".globl vgModuleLocal_call_on_new_stack_0_1\n"
+    ".type vgModuleLocal_call_on_new_stack_0_1, @function\n"
+    "vgModuleLocal_call_on_new_stack_0_1:\n"
+    "   lgr %r15,%r2\n"     // stack to r15
+    "   lgr %r14,%r3\n"     // retaddr to r14
+    "   lgr %r2,%r5\n"      // arg1 to r2
+    // zero all gprs to get a defined state
+    "   lghi  %r0,0\n"
+    "   lghi  %r1,0\n"
+    // r2 holds the argument for the callee
+    "   lghi  %r3,0\n"
+    // r4 holds the callee address
+    "   lghi  %r5,0\n"
+    "   lghi  %r6,0\n"
+    "   lghi  %r7,0\n"
+    "   lghi  %r8,0\n"
+    "   lghi  %r9,0\n"
+    "   lghi  %r10,0\n"
+    "   lghi  %r11,0\n"
+    "   lghi  %r12,0\n"
+    "   lghi  %r13,0\n"
+    // r14 holds the return address for the callee
+    // r15 is the stack pointer
+    "   br  %r4\n"          // jump to f
+    ".previous\n"
+    );
+
+/*
+        Perform a clone system call.  clone is strange because it has
+        fork()-like return-twice semantics, so it needs special
+        handling here.
+
+        Upon entry, we have:
+            void*  child_stack   in r2
+            long   flags         in r3
+            int*   parent_tid    in r4
+            int*   child_tid     in r5
+            int*   child_tid     in r6
+            Word   (*fn)(void *) 160(r15)
+            void   *arg          168(r15)
+
+        System call requires:
+            void*  child_stack  in r2  (sc arg1)
+            long   flags        in r3  (sc arg2)
+            int*   parent_tid   in r4  (sc arg3)
+            int*   child_tid    in r5  (sc arg4)
+            void*  tlsaddr      in r6  (sc arg5)
+
+        Returns a ULong encoded as: top half is %cr following syscall,
+        low half is syscall return value (r3).
+ */
+#define __NR_CLONE        VG_STRINGIFY(__NR_clone)
+#define __NR_EXIT         VG_STRINGIFY(__NR_exit)
+
+extern
+ULong do_syscall_clone_s390x_linux ( void  *stack,
+                                     ULong flags,
+                                     Int   *child_tid,
+                                     Int   *parent_tid,
+                                     Addr  tlsaddr,
+                                     Word (*fn)(void *),
+                                     void  *arg);
+asm(
+   "   .text\n"
+   "   .align  4\n"
+   "do_syscall_clone_s390x_linux:\n"
+   "   lg    %r1, 160(%r15)\n"   // save fn from parent stack into r1
+   "   lg    %r0, 168(%r15)\n"   // save arg from parent stack into r0
+   "   aghi  %r2, -160\n"        // create stack frame for child
+   // all syscall parameters are already in place (r2-r6)
+   "   svc " __NR_CLONE"\n"        // clone()
+   "   ltgr  %r2,%r2\n"           // child if retval == 0
+   "   jne   1f\n"
+
+   // CHILD - call thread function
+   "   lgr   %r2, %r0\n"            // get arg from r0
+   "   basr  %r14,%r1\n"            // call fn
+
+   // exit. The result is already in r2
+   "   svc " __NR_EXIT"\n"
+
+   // Exit returned?!
+   "   j +2\n"
+
+   "1:\n"  // PARENT or ERROR
+   "   br %r14\n"
+   ".previous\n"
+);
+
+#undef __NR_CLONE
+#undef __NR_EXIT
+
+void VG_(cleanup_thread) ( ThreadArchState* arch )
+{
+  /* only used on x86 for descriptor tables */
+}
+
+static void setup_child ( /*OUT*/ ThreadArchState *child,
+                   /*IN*/  ThreadArchState *parent )
+{
+   /* We inherit our parent's guest state. */
+   child->vex = parent->vex;
+   child->vex_shadow1 = parent->vex_shadow1;
+   child->vex_shadow2 = parent->vex_shadow2;
+}
+
+
+/*
+   When a client clones, we need to keep track of the new thread.  This means:
+   1. allocate a ThreadId+ThreadState+stack for the the thread
+
+   2. initialize the thread's new VCPU state
+
+   3. create the thread using the same args as the client requested,
+   but using the scheduler entrypoint for IP, and a separate stack
+   for SP.
+ */
+static SysRes do_clone ( ThreadId ptid,
+                         Addr sp, ULong flags,
+                         Int *parent_tidptr,
+                         Int *child_tidptr,
+                         Addr tlsaddr)
+{
+   static const Bool debug = False;
+
+   ThreadId     ctid = VG_(alloc_ThreadState)();
+   ThreadState* ptst = VG_(get_ThreadState)(ptid);
+   ThreadState* ctst = VG_(get_ThreadState)(ctid);
+   UWord*       stack;
+   NSegment const* seg;
+   SysRes       res;
+   ULong        r2;
+   vki_sigset_t blockall, savedmask;
+
+   VG_(sigfillset)(&blockall);
+
+   vg_assert(VG_(is_running_thread)(ptid));
+   vg_assert(VG_(is_valid_tid)(ctid));
+
+   stack = (UWord*)ML_(allocstack)(ctid);
+   if (stack == NULL) {
+      res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
+      goto out;
+   }
+
+   /* Copy register state
+
+      Both parent and child return to the same place, and the code
+      following the clone syscall works out which is which, so we
+      don't need to worry about it.
+
+      The parent gets the child's new tid returned from clone, but the
+      child gets 0.
+
+      If the clone call specifies a NULL sp for the new thread, then
+      it actually gets a copy of the parent's sp.
+   */
+   setup_child( &ctst->arch, &ptst->arch );
+
+   /* Make sys_clone appear to have returned Success(0) in the
+      child. */
+   ctst->arch.vex.guest_r2 = 0;
+
+   if (sp != 0)
+      ctst->arch.vex.guest_r15 = sp;
+
+   ctst->os_state.parent = ptid;
+
+   /* inherit signal mask */
+   ctst->sig_mask = ptst->sig_mask;
+   ctst->tmp_sig_mask = ptst->sig_mask;
+
+   /* have the parents thread group */
+   ctst->os_state.threadgroup = ptst->os_state.threadgroup;
+
+   /* We don't really know where the client stack is, because its
+      allocated by the client.  The best we can do is look at the
+      memory mappings and try to derive some useful information.  We
+      assume that esp starts near its highest possible value, and can
+      only go down to the start of the mmaped segment. */
+   seg = VG_(am_find_nsegment)((Addr)sp);
+   if (seg && seg->kind != SkResvn) {
+      ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp);
+      ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
+
+      VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
+
+      if (debug)
+	 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
+		     ctid, seg->start, VG_PGROUNDUP(sp));
+   } else {
+      VG_(message)(Vg_UserMsg,
+                   "!? New thread %d starts with SP(%#lx) unmapped\n",
+		   ctid, sp);
+      ctst->client_stack_szB  = 0;
+   }
+
+   /* Assume the clone will succeed, and tell any tool that wants to
+      know that this thread has come into existence.  If the clone
+      fails, we'll send out a ll_exit notification for it at the out:
+      label below, to clean up. */
+   VG_TRACK ( pre_thread_ll_create, ptid, ctid );
+
+   if (flags & VKI_CLONE_SETTLS) {
+      if (debug)
+	 VG_(printf)("clone child has SETTLS: tls at %#lx\n", tlsaddr);
+      ctst->arch.vex.guest_a0 = (UInt) (tlsaddr >> 32);
+      ctst->arch.vex.guest_a1 = (UInt) tlsaddr;
+   }
+   flags &= ~VKI_CLONE_SETTLS;
+
+   /* start the thread with everything blocked */
+   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
+
+   /* Create the new thread */
+   r2 = do_syscall_clone_s390x_linux(
+            stack, flags, child_tidptr, parent_tidptr, tlsaddr,
+            ML_(start_thread_NORETURN), &VG_(threads)[ctid]);
+
+   res = VG_(mk_SysRes_s390x_linux)( r2 );
+
+   VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
+
+  out:
+   if (sr_isError(res)) {
+      /* clone failed */
+      ctst->status = VgTs_Empty;
+      /* oops.  Better tell the tool the thread exited in a hurry :-) */
+      VG_TRACK( pre_thread_ll_exit, ctid );
+   }
+
+   return res;
+
+}
+
+
+
+/* ---------------------------------------------------------------------
+   PRE/POST wrappers for s390x/Linux-specific syscalls
+   ------------------------------------------------------------------ */
+
+#define PRE(name)       DEFN_PRE_TEMPLATE(s390x_linux, name)
+#define POST(name)      DEFN_POST_TEMPLATE(s390x_linux, name)
+
+/* Add prototypes for the wrappers declared here, so that gcc doesn't
+   harass us for not having prototypes.  Really this is a kludge --
+   the right thing to do is to make these wrappers 'static' since they
+   aren't visible outside this file, but that requires even more macro
+   magic. */
+
+DECL_TEMPLATE(s390x_linux, sys_ptrace);
+DECL_TEMPLATE(s390x_linux, sys_socketcall);
+DECL_TEMPLATE(s390x_linux, sys_mmap);
+DECL_TEMPLATE(s390x_linux, sys_ipc);
+DECL_TEMPLATE(s390x_linux, sys_clone);
+DECL_TEMPLATE(s390x_linux, sys_sigreturn);
+DECL_TEMPLATE(s390x_linux, sys_rt_sigreturn);
+DECL_TEMPLATE(s390x_linux, sys_fadvise64);
+
+// PEEK TEXT,DATA and USER are common to all architectures
+// PEEKUSR_AREA and POKEUSR_AREA are special, having a memory area
+// containing the real addr, data, and len field pointed to by ARG3
+// instead of ARG4
+PRE(sys_ptrace)
+{
+   PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
+   PRE_REG_READ4(int, "ptrace",
+                 long, request, long, pid, long, addr, long, data);
+   switch (ARG1) {
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      PRE_MEM_WRITE( "ptrace(peek)", ARG4,
+		     sizeof (long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_SETSIGINFO:
+      PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_PEEKUSR_AREA:
+      {
+         vki_ptrace_area *pa;
+
+         /* Reads a part of the user area into memory at pa->process_addr */
+	 pa = (vki_ptrace_area *) ARG3;
+         PRE_MEM_READ("ptrace(peekusrarea ptrace_area->len)",
+                      (unsigned long) &pa->vki_len, sizeof(pa->vki_len));
+         PRE_MEM_READ("ptrace(peekusrarea ptrace_area->kernel_addr)",
+                      (unsigned long) &pa->vki_kernel_addr, sizeof(pa->vki_kernel_addr));
+         PRE_MEM_READ("ptrace(peekusrarea ptrace_area->process_addr)",
+                      (unsigned long) &pa->vki_process_addr, sizeof(pa->vki_process_addr));
+         PRE_MEM_WRITE("ptrace(peekusrarea *(ptrace_area->process_addr))",
+                       pa->vki_process_addr, pa->vki_len);
+         break;
+      }
+   case VKI_PTRACE_POKEUSR_AREA:
+      {
+         vki_ptrace_area *pa;
+
+         /* Updates a part of the user area from memory at pa->process_addr */
+	 pa = (vki_ptrace_area *) ARG3;
+         PRE_MEM_READ("ptrace(pokeusrarea ptrace_area->len)",
+                      (unsigned long) &pa->vki_len, sizeof(pa->vki_len));
+         PRE_MEM_READ("ptrace(pokeusrarea ptrace_area->kernel_addr)",
+                      (unsigned long) &pa->vki_kernel_addr,
+                      sizeof(pa->vki_kernel_addr));
+         PRE_MEM_READ("ptrace(pokeusrarea ptrace_area->process_addr)",
+                      (unsigned long) &pa->vki_process_addr,
+                      sizeof(pa->vki_process_addr));
+         PRE_MEM_READ("ptrace(pokeusrarea *(ptrace_area->process_addr))",
+                       pa->vki_process_addr, pa->vki_len);
+         break;
+      }
+   default:
+      break;
+   }
+}
+
+POST(sys_ptrace)
+{
+   switch (ARG1) {
+   case VKI_PTRACE_PEEKTEXT:
+   case VKI_PTRACE_PEEKDATA:
+   case VKI_PTRACE_PEEKUSR:
+      POST_MEM_WRITE( ARG4, sizeof (long));
+      break;
+   case VKI_PTRACE_GETEVENTMSG:
+      POST_MEM_WRITE( ARG4, sizeof(unsigned long));
+      break;
+   case VKI_PTRACE_GETSIGINFO:
+      /* XXX: This is a simplification. Different parts of the
+       * siginfo_t are valid depending on the type of signal.
+       */
+      POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
+      break;
+   case VKI_PTRACE_PEEKUSR_AREA:
+      {
+         vki_ptrace_area *pa;
+
+	 pa = (vki_ptrace_area *) ARG3;
+         POST_MEM_WRITE(pa->vki_process_addr, pa->vki_len);
+      }
+   default:
+      break;
+   }
+}
+
+
+PRE(sys_socketcall)
+{
+#  define ARG2_0  (((UWord*)ARG2)[0])
+#  define ARG2_1  (((UWord*)ARG2)[1])
+#  define ARG2_2  (((UWord*)ARG2)[2])
+#  define ARG2_3  (((UWord*)ARG2)[3])
+#  define ARG2_4  (((UWord*)ARG2)[4])
+#  define ARG2_5  (((UWord*)ARG2)[5])
+
+   *flags |= SfMayBlock;
+   PRINT("sys_socketcall ( %ld, %#lx )",ARG1,ARG2);
+   PRE_REG_READ2(long, "socketcall", int, call, unsigned long *, args);
+
+   switch (ARG1 /* request */) {
+
+   case VKI_SYS_SOCKETPAIR:
+     /* int socketpair(int d, int type, int protocol, int sv[2]); */
+      PRE_MEM_READ( "socketcall.socketpair(args)", ARG2, 4*sizeof(Addr) );
+      if (!ML_(valid_client_addr)(ARG2, 4*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+      }
+      ML_(generic_PRE_sys_socketpair)( tid, ARG2_0, ARG2_1, ARG2_2, ARG2_3 );
+      break;
+
+   case VKI_SYS_SOCKET:
+     /* int socket(int domain, int type, int protocol); */
+      PRE_MEM_READ( "socketcall.socket(args)", ARG2, 3*sizeof(Addr) );
+      if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+      }
+      break;
+
+   case VKI_SYS_BIND:
+     /* int bind(int sockfd, struct sockaddr *my_addr,
+	int addrlen); */
+      PRE_MEM_READ( "socketcall.bind(args)", ARG2, 3*sizeof(Addr) );
+      if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+      }
+      ML_(generic_PRE_sys_bind)( tid, ARG2_0, ARG2_1, ARG2_2 );
+      break;
+
+   case VKI_SYS_LISTEN:
+     /* int listen(int s, int backlog); */
+      PRE_MEM_READ( "socketcall.listen(args)", ARG2, 2*sizeof(Addr) );
+      if (!ML_(valid_client_addr)(ARG2, 2*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+      }
+      break;
+
+   case VKI_SYS_ACCEPT: {
+     /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+      PRE_MEM_READ( "socketcall.accept(args)", ARG2, 3*sizeof(Addr) );
+      if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+      }
+      ML_(generic_PRE_sys_accept)( tid, ARG2_0, ARG2_1, ARG2_2 );
+      break;
+   }
+
+   case VKI_SYS_SENDTO:
+     /* int sendto(int s, const void *msg, int len,
+                    unsigned int flags,
+                    const struct sockaddr *to, int tolen); */
+     PRE_MEM_READ( "socketcall.sendto(args)", ARG2, 6*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 6*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_sendto)( tid, ARG2_0, ARG2_1, ARG2_2,
+				  ARG2_3, ARG2_4, ARG2_5 );
+     break;
+
+   case VKI_SYS_SEND:
+     /* int send(int s, const void *msg, size_t len, int flags); */
+     PRE_MEM_READ( "socketcall.send(args)", ARG2, 4*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 4*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_send)( tid, ARG2_0, ARG2_1, ARG2_2 );
+     break;
+
+   case VKI_SYS_RECVFROM:
+     /* int recvfrom(int s, void *buf, int len, unsigned int flags,
+	struct sockaddr *from, int *fromlen); */
+     PRE_MEM_READ( "socketcall.recvfrom(args)", ARG2, 6*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 6*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_recvfrom)( tid, ARG2_0, ARG2_1, ARG2_2,
+				    ARG2_3, ARG2_4, ARG2_5 );
+     break;
+
+   case VKI_SYS_RECV:
+     /* int recv(int s, void *buf, int len, unsigned int flags); */
+     /* man 2 recv says:
+         The  recv call is normally used only on a connected socket
+         (see connect(2)) and is identical to recvfrom with a  NULL
+         from parameter.
+     */
+     PRE_MEM_READ( "socketcall.recv(args)", ARG2, 4*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 4*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_recv)( tid, ARG2_0, ARG2_1, ARG2_2 );
+     break;
+
+   case VKI_SYS_CONNECT:
+     /* int connect(int sockfd,
+	struct sockaddr *serv_addr, int addrlen ); */
+     PRE_MEM_READ( "socketcall.connect(args)", ARG2, 3*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_connect)( tid, ARG2_0, ARG2_1, ARG2_2 );
+     break;
+
+   case VKI_SYS_SETSOCKOPT:
+     /* int setsockopt(int s, int level, int optname,
+	const void *optval, int optlen); */
+     PRE_MEM_READ( "socketcall.setsockopt(args)", ARG2, 5*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 5*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_setsockopt)( tid, ARG2_0, ARG2_1, ARG2_2,
+				      ARG2_3, ARG2_4 );
+     break;
+
+   case VKI_SYS_GETSOCKOPT:
+     /* int getsockopt(int s, int level, int optname,
+	void *optval, socklen_t *optlen); */
+     PRE_MEM_READ( "socketcall.getsockopt(args)", ARG2, 5*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 5*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(linux_PRE_sys_getsockopt)( tid, ARG2_0, ARG2_1, ARG2_2,
+				      ARG2_3, ARG2_4 );
+     break;
+
+   case VKI_SYS_GETSOCKNAME:
+     /* int getsockname(int s, struct sockaddr* name, int* namelen) */
+     PRE_MEM_READ( "socketcall.getsockname(args)", ARG2, 3*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_getsockname)( tid, ARG2_0, ARG2_1, ARG2_2 );
+     break;
+
+   case VKI_SYS_GETPEERNAME:
+     /* int getpeername(int s, struct sockaddr* name, int* namelen) */
+     PRE_MEM_READ( "socketcall.getpeername(args)", ARG2, 3*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_getpeername)( tid, ARG2_0, ARG2_1, ARG2_2 );
+     break;
+
+   case VKI_SYS_SHUTDOWN:
+     /* int shutdown(int s, int how); */
+     PRE_MEM_READ( "socketcall.shutdown(args)", ARG2, 2*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 2*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     break;
+
+   case VKI_SYS_SENDMSG: {
+     /* int sendmsg(int s, const struct msghdr *msg, int flags); */
+     PRE_MEM_READ( "socketcall.sendmsg(args)", ARG2, 3*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_sendmsg)( tid, ARG2_0, ARG2_1 );
+     break;
+   }
+
+   case VKI_SYS_RECVMSG: {
+     /* int recvmsg(int s, struct msghdr *msg, int flags); */
+     PRE_MEM_READ("socketcall.recvmsg(args)", ARG2, 3*sizeof(Addr) );
+     if (!ML_(valid_client_addr)(ARG2, 3*sizeof(Addr), tid, NULL)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         break;
+     }
+     ML_(generic_PRE_sys_recvmsg)( tid, ARG2_0, ARG2_1 );
+     break;
+   }
+
+   default:
+     VG_(message)(Vg_DebugMsg,"Warning: unhandled socketcall 0x%lx\n",ARG1);
+     SET_STATUS_Failure( VKI_EINVAL );
+     break;
+   }
+#  undef ARG2_0
+#  undef ARG2_1
+#  undef ARG2_2
+#  undef ARG2_3
+#  undef ARG2_4
+#  undef ARG2_5
+}
+
+POST(sys_socketcall)
+{
+#  define ARG2_0  (((UWord*)ARG2)[0])
+#  define ARG2_1  (((UWord*)ARG2)[1])
+#  define ARG2_2  (((UWord*)ARG2)[2])
+#  define ARG2_3  (((UWord*)ARG2)[3])
+#  define ARG2_4  (((UWord*)ARG2)[4])
+#  define ARG2_5  (((UWord*)ARG2)[5])
+
+  SysRes r;
+  vg_assert(SUCCESS);
+  switch (ARG1 /* request */) {
+
+  case VKI_SYS_SOCKETPAIR:
+    r = ML_(generic_POST_sys_socketpair)(
+					 tid, VG_(mk_SysRes_Success)(RES),
+					 ARG2_0, ARG2_1, ARG2_2, ARG2_3
+					 );
+    SET_STATUS_from_SysRes(r);
+    break;
+
+  case VKI_SYS_SOCKET:
+    r = ML_(generic_POST_sys_socket)( tid, VG_(mk_SysRes_Success)(RES) );
+    SET_STATUS_from_SysRes(r);
+    break;
+
+  case VKI_SYS_BIND:
+    /* int bind(int sockfd, struct sockaddr *my_addr,
+       int addrlen); */
+    break;
+
+  case VKI_SYS_LISTEN:
+    /* int listen(int s, int backlog); */
+    break;
+
+  case VKI_SYS_ACCEPT:
+    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
+    r = ML_(generic_POST_sys_accept)( tid, VG_(mk_SysRes_Success)(RES),
+				      ARG2_0, ARG2_1, ARG2_2 );
+    SET_STATUS_from_SysRes(r);
+    break;
+
+  case VKI_SYS_SENDTO:
+    break;
+
+  case VKI_SYS_SEND:
+    break;
+
+  case VKI_SYS_RECVFROM:
+    ML_(generic_POST_sys_recvfrom)( tid, VG_(mk_SysRes_Success)(RES),
+				    ARG2_0, ARG2_1, ARG2_2,
+				    ARG2_3, ARG2_4, ARG2_5 );
+    break;
+
+  case VKI_SYS_RECV:
+    ML_(generic_POST_sys_recv)( tid, RES, ARG2_0, ARG2_1, ARG2_2 );
+    break;
+
+  case VKI_SYS_CONNECT:
+    break;
+
+  case VKI_SYS_SETSOCKOPT:
+    break;
+
+  case VKI_SYS_GETSOCKOPT:
+    ML_(linux_POST_sys_getsockopt)( tid, VG_(mk_SysRes_Success)(RES),
+				      ARG2_0, ARG2_1,
+				      ARG2_2, ARG2_3, ARG2_4 );
+    break;
+
+  case VKI_SYS_GETSOCKNAME:
+    ML_(generic_POST_sys_getsockname)( tid, VG_(mk_SysRes_Success)(RES),
+				       ARG2_0, ARG2_1, ARG2_2 );
+    break;
+
+  case VKI_SYS_GETPEERNAME:
+    ML_(generic_POST_sys_getpeername)( tid, VG_(mk_SysRes_Success)(RES),
+				       ARG2_0, ARG2_1, ARG2_2 );
+    break;
+
+  case VKI_SYS_SHUTDOWN:
+    break;
+
+  case VKI_SYS_SENDMSG:
+    break;
+
+  case VKI_SYS_RECVMSG:
+    ML_(generic_POST_sys_recvmsg)( tid, ARG2_0, ARG2_1 );
+    break;
+
+  default:
+    VG_(message)(Vg_DebugMsg,"FATAL: unhandled socketcall 0x%lx\n",ARG1);
+    VG_(core_panic)("... bye!\n");
+    break; /*NOTREACHED*/
+  }
+#  undef ARG2_0
+#  undef ARG2_1
+#  undef ARG2_2
+#  undef ARG2_3
+#  undef ARG2_4
+#  undef ARG2_5
+}
+
+PRE(sys_mmap)
+{
+   UWord a0, a1, a2, a3, a4, a5;
+   SysRes r;
+
+   UWord* args = (UWord*)ARG1;
+   PRE_REG_READ1(long, "sys_mmap", struct mmap_arg_struct *, args);
+   PRE_MEM_READ( "sys_mmap(args)", (Addr) args, 6*sizeof(UWord) );
+
+   a0 = args[0];
+   a1 = args[1];
+   a2 = args[2];
+   a3 = args[3];
+   a4 = args[4];
+   a5 = args[5];
+
+   PRINT("sys_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
+         a0, (ULong)a1, a2, a3, a4, a5 );
+
+   r = ML_(generic_PRE_sys_mmap)( tid, a0, a1, a2, a3, a4, (Off64T)a5 );
+   SET_STATUS_from_SysRes(r);
+}
+
+static Addr deref_Addr ( ThreadId tid, Addr a, Char* s )
+{
+   Addr* a_p = (Addr*)a;
+   PRE_MEM_READ( s, (Addr)a_p, sizeof(Addr) );
+   return *a_p;
+}
+
+PRE(sys_ipc)
+{
+  PRINT("sys_ipc ( %ld, %ld, %ld, %ld, %#lx, %ld )",
+        ARG1,ARG2,ARG3,ARG4,ARG5,ARG6);
+  // XXX: this is simplistic -- some args are not used in all circumstances.
+  PRE_REG_READ6(int, "ipc",
+		vki_uint, call, int, first, int, second, int, third,
+		void *, ptr, long, fifth)
+
+    switch (ARG1 /* call */) {
+    case VKI_SEMOP:
+      ML_(generic_PRE_sys_semop)( tid, ARG2, ARG5, ARG3 );
+      *flags |= SfMayBlock;
+      break;
+    case VKI_SEMGET:
+      break;
+    case VKI_SEMCTL:
+      {
+	UWord arg = deref_Addr( tid, ARG5, "semctl(arg)" );
+	ML_(generic_PRE_sys_semctl)( tid, ARG2, ARG3, ARG4, arg );
+	break;
+      }
+    case VKI_SEMTIMEDOP:
+      ML_(generic_PRE_sys_semtimedop)( tid, ARG2, ARG5, ARG3, ARG6 );
+      *flags |= SfMayBlock;
+      break;
+    case VKI_MSGSND:
+      ML_(linux_PRE_sys_msgsnd)( tid, ARG2, ARG5, ARG3, ARG4 );
+      if ((ARG4 & VKI_IPC_NOWAIT) == 0)
+	*flags |= SfMayBlock;
+      break;
+    case VKI_MSGRCV:
+      {
+	Addr msgp;
+	Word msgtyp;
+
+	msgp = deref_Addr( tid,
+			   (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
+			   "msgrcv(msgp)" );
+	msgtyp = deref_Addr( tid,
+			     (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgtyp),
+			     "msgrcv(msgp)" );
+
+	ML_(linux_PRE_sys_msgrcv)( tid, ARG2, msgp, ARG3, msgtyp, ARG4 );
+
+	if ((ARG4 & VKI_IPC_NOWAIT) == 0)
+	  *flags |= SfMayBlock;
+	break;
+      }
+    case VKI_MSGGET:
+      break;
+    case VKI_MSGCTL:
+      ML_(linux_PRE_sys_msgctl)( tid, ARG2, ARG3, ARG5 );
+      break;
+    case VKI_SHMAT:
+      {
+	UWord w;
+	PRE_MEM_WRITE( "shmat(raddr)", ARG4, sizeof(Addr) );
+	w = ML_(generic_PRE_sys_shmat)( tid, ARG2, ARG5, ARG3 );
+	if (w == 0)
+	  SET_STATUS_Failure( VKI_EINVAL );
+	else
+	  ARG5 = w;
+	break;
+      }
+    case VKI_SHMDT:
+      if (!ML_(generic_PRE_sys_shmdt)(tid, ARG5))
+	SET_STATUS_Failure( VKI_EINVAL );
+      break;
+    case VKI_SHMGET:
+      break;
+    case VKI_SHMCTL: /* IPCOP_shmctl */
+      ML_(generic_PRE_sys_shmctl)( tid, ARG2, ARG3, ARG5 );
+      break;
+    default:
+      VG_(message)(Vg_DebugMsg, "FATAL: unhandled syscall(ipc) %ld", ARG1 );
+      VG_(core_panic)("... bye!\n");
+      break; /*NOTREACHED*/
+    }
+}
+
+POST(sys_ipc)
+{
+  vg_assert(SUCCESS);
+  switch (ARG1 /* call */) {
+  case VKI_SEMOP:
+  case VKI_SEMGET:
+    break;
+  case VKI_SEMCTL:
+    {
+      UWord arg = deref_Addr( tid, ARG5, "semctl(arg)" );
+      ML_(generic_PRE_sys_semctl)( tid, ARG2, ARG3, ARG4, arg );
+      break;
+    }
+  case VKI_SEMTIMEDOP:
+  case VKI_MSGSND:
+    break;
+  case VKI_MSGRCV:
+    {
+      Addr msgp;
+      Word msgtyp;
+
+      msgp = deref_Addr( tid,
+                         (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgp),
+                         "msgrcv(msgp)" );
+      msgtyp = deref_Addr( tid,
+                           (Addr) (&((struct vki_ipc_kludge *)ARG5)->msgtyp),
+                           "msgrcv(msgp)" );
+
+      ML_(linux_POST_sys_msgrcv)( tid, RES, ARG2, msgp, ARG3, msgtyp, ARG4 );
+      break;
+    }
+  case VKI_MSGGET:
+    break;
+  case VKI_MSGCTL:
+    ML_(linux_POST_sys_msgctl)( tid, RES, ARG2, ARG3, ARG5 );
+    break;
+  case VKI_SHMAT:
+    {
+      Addr addr;
+
+      /* force readability. before the syscall it is
+       * indeed uninitialized, as can be seen in
+       * glibc/sysdeps/unix/sysv/linux/shmat.c */
+      POST_MEM_WRITE( ARG4, sizeof( Addr ) );
+
+      addr = deref_Addr ( tid, ARG4, "shmat(addr)" );
+      ML_(generic_POST_sys_shmat)( tid, addr, ARG2, ARG5, ARG3 );
+      break;
+    }
+  case VKI_SHMDT:
+    ML_(generic_POST_sys_shmdt)( tid, RES, ARG5 );
+    break;
+  case VKI_SHMGET:
+    break;
+  case VKI_SHMCTL:
+    ML_(generic_POST_sys_shmctl)( tid, RES, ARG2, ARG3, ARG5 );
+    break;
+  default:
+    VG_(message)(Vg_DebugMsg,
+		 "FATAL: unhandled syscall(ipc) %ld",
+		 ARG1 );
+    VG_(core_panic)("... bye!\n");
+    break; /*NOTREACHED*/
+  }
+}
+
+PRE(sys_clone)
+{
+   UInt cloneflags;
+
+   PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4, ARG5);
+   PRE_REG_READ4(int, "clone",
+                 void *,        child_stack,
+                 unsigned long, flags,
+                 int *,         parent_tidptr,
+                 int *,         child_tidptr);
+
+   if (ARG2 & VKI_CLONE_PARENT_SETTID) {
+      PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
+      if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
+                                             VKI_PROT_WRITE)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         return;
+      }
+   }
+   if (ARG2 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
+      PRE_MEM_WRITE("clone(child_tidptr)", ARG4, sizeof(Int));
+      if (!VG_(am_is_valid_for_client)(ARG4, sizeof(Int),
+                                             VKI_PROT_WRITE)) {
+         SET_STATUS_Failure( VKI_EFAULT );
+         return;
+      }
+   }
+
+   cloneflags = ARG2;
+
+   if (!ML_(client_signal_OK)(ARG2 & VKI_CSIGNAL)) {
+      SET_STATUS_Failure( VKI_EINVAL );
+      return;
+   }
+
+   /* Only look at the flags we really care about */
+   switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
+                         | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
+   case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
+      /* thread creation */
+      SET_STATUS_from_SysRes(
+         do_clone(tid,
+                  (Addr)ARG1,   /* child SP */
+                  ARG2,         /* flags */
+                  (Int *)ARG3,  /* parent_tidptr */
+                  (Int *)ARG4, /* child_tidptr */
+                  (Addr)ARG5)); /*  tlsaddr */
+      break;
+
+   case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
+      /* FALLTHROUGH - assume vfork == fork */
+      cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
+
+   case 0: /* plain fork */
+      SET_STATUS_from_SysRes(
+         ML_(do_fork_clone)(tid,
+                       cloneflags,      /* flags */
+                       (Int *)ARG3,     /* parent_tidptr */
+                       (Int *)ARG4));   /* child_tidptr */
+      break;
+
+   default:
+      /* should we just ENOSYS? */
+      VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx", ARG2);
+      VG_(message)(Vg_UserMsg, "");
+      VG_(message)(Vg_UserMsg, "The only supported clone() uses are:");
+      VG_(message)(Vg_UserMsg, " - via a threads library (NPTL)");
+      VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork");
+      VG_(unimplemented)
+         ("Valgrind does not support general clone().");
+   }
+
+   if (SUCCESS) {
+      if (ARG2 & VKI_CLONE_PARENT_SETTID)
+         POST_MEM_WRITE(ARG3, sizeof(Int));
+      if (ARG2 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
+         POST_MEM_WRITE(ARG4, sizeof(Int));
+
+      /* Thread creation was successful; let the child have the chance
+         to run */
+      *flags |= SfYieldAfter;
+   }
+}
+
+PRE(sys_sigreturn)
+{
+   ThreadState* tst;
+   PRINT("sys_sigreturn ( )");
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   tst = VG_(get_ThreadState)(tid);
+
+   /* This is only so that the IA is (might be) useful to report if
+      something goes wrong in the sigreturn */
+   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+
+   /* Restore register state from frame and remove it */
+   VG_(sigframe_destroy)(tid, False);
+
+   /* Tell the driver not to update the guest state with the "result",
+      and set a bogus result to keep it happy. */
+   *flags |= SfNoWriteResult;
+   SET_STATUS_Success(0);
+
+   /* Check to see if any signals arose as a result of this. */
+   *flags |= SfPollAfter;
+}
+
+
+PRE(sys_rt_sigreturn)
+{
+   /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
+      an explanation of what follows. */
+
+   ThreadState* tst;
+   PRINT("sys_rt_sigreturn ( )");
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   vg_assert(tid >= 1 && tid < VG_N_THREADS);
+   vg_assert(VG_(is_running_thread)(tid));
+
+   tst = VG_(get_ThreadState)(tid);
+
+   /* This is only so that the IA is (might be) useful to report if
+      something goes wrong in the sigreturn */
+   ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
+
+   /* Restore register state from frame and remove it */
+   VG_(sigframe_destroy)(tid, True);
+
+   /* Tell the driver not to update the guest state with the "result",
+      and set a bogus result to keep it happy. */
+   *flags |= SfNoWriteResult;
+   SET_STATUS_Success(0);
+
+   /* Check to see if any signals arose as a result of this. */
+   *flags |= SfPollAfter;
+}
+
+/* we cant use the LINX_ version for 64 bit */
+PRE(sys_fadvise64)
+{
+   PRINT("sys_fadvise64 ( %ld, %ld, %ld, %ld )", ARG1,ARG2,ARG3,ARG4);
+   PRE_REG_READ4(long, "fadvise64",
+                 int, fd, vki_loff_t, offset, vki_loff_t, len, int, advice);
+}
+
+#undef PRE
+#undef POST
+
+/* ---------------------------------------------------------------------
+   The s390x/Linux syscall table
+   ------------------------------------------------------------------ */
+
+/* Add an s390x-linux specific wrapper to a syscall table. */
+#define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(s390x_linux, sysno, name)
+#define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(s390x_linux, sysno, name)
+
+// This table maps from __NR_xxx syscall numbers from
+// linux/arch/s390/kernel/syscalls.S to the appropriate PRE/POST sys_foo()
+// wrappers on s390x. There are several unused numbers, which are only
+// defined on s390 (31bit mode) but no longer available on s390x (64 bit).
+// For those syscalls not handled by Valgrind, the annotation indicate its
+// arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
+// (unknown).
+
+static SyscallTableEntry syscall_table[] = {
+   GENX_(0, sys_ni_syscall), /* unimplemented (by the kernel) */      // 0
+   GENX_(__NR_exit,  sys_exit),                                       // 1
+   GENX_(__NR_fork,  sys_fork),                                       // 2
+   GENXY(__NR_read,  sys_read),                                       // 3
+   GENX_(__NR_write,  sys_write),                                     // 4
+
+   GENXY(__NR_open,  sys_open),                                       // 5
+   GENXY(__NR_close,  sys_close),                                     // 6
+// ?????(__NR_restart_syscall, ),                                     // 7
+   GENXY(__NR_creat,  sys_creat),                                     // 8
+   GENX_(__NR_link,  sys_link),                                       // 9
+
+   GENX_(__NR_unlink,  sys_unlink),                                   // 10
+   GENX_(__NR_execve,  sys_execve),                                   // 11
+   GENX_(__NR_chdir,  sys_chdir),                                     // 12
+   GENX_(13, sys_ni_syscall), /* unimplemented (by the kernel) */     // 13
+   GENX_(__NR_mknod,  sys_mknod),                                     // 14
+
+   GENX_(__NR_chmod,  sys_chmod),                                     // 15
+   GENX_(16, sys_ni_syscall), /* unimplemented (by the kernel) */     // 16
+   GENX_(17, sys_ni_syscall), /* unimplemented (by the kernel) */     // 17
+   GENX_(18, sys_ni_syscall), /* unimplemented (by the kernel) */     // 18
+   LINX_(__NR_lseek,  sys_lseek),                                     // 19
+
+   GENX_(__NR_getpid,  sys_getpid),                                   // 20
+   LINX_(__NR_mount,  sys_mount),                                     // 21
+   LINX_(__NR_umount, sys_oldumount),                                 // 22
+   GENX_(23, sys_ni_syscall), /* unimplemented (by the kernel) */     // 23
+   GENX_(24, sys_ni_syscall), /* unimplemented (by the kernel) */     // 24
+
+   GENX_(25, sys_ni_syscall), /* unimplemented (by the kernel) */     // 25
+   PLAXY(__NR_ptrace, sys_ptrace),                                    // 26
+   GENX_(__NR_alarm,  sys_alarm),                                     // 27
+   GENX_(28, sys_ni_syscall), /* unimplemented (by the kernel) */     // 28
+   GENX_(__NR_pause,  sys_pause),                                     // 29
+
+   LINX_(__NR_utime,  sys_utime),                                     // 30
+   GENX_(31, sys_ni_syscall), /* unimplemented (by the kernel) */     // 31
+   GENX_(32, sys_ni_syscall), /* unimplemented (by the kernel) */     // 32
+   GENX_(__NR_access,  sys_access),                                   // 33
+   GENX_(__NR_nice, sys_nice),                                        // 34
+
+   GENX_(35, sys_ni_syscall), /* unimplemented (by the kernel) */     // 35
+   GENX_(__NR_sync, sys_sync),                                        // 36
+   GENX_(__NR_kill,  sys_kill),                                       // 37
+   GENX_(__NR_rename,  sys_rename),                                   // 38
+   GENX_(__NR_mkdir,  sys_mkdir),                                     // 39
+
+   GENX_(__NR_rmdir, sys_rmdir),                                      // 40
+   GENXY(__NR_dup,  sys_dup),                                         // 41
+   LINXY(__NR_pipe,  sys_pipe),                                       // 42
+   GENXY(__NR_times,  sys_times),                                     // 43
+   GENX_(44, sys_ni_syscall), /* unimplemented (by the kernel) */     // 44
+
+   GENX_(__NR_brk,  sys_brk),                                         // 45
+   GENX_(46, sys_ni_syscall), /* unimplemented (by the kernel) */     // 46
+   GENX_(47, sys_ni_syscall), /* unimplemented (by the kernel) */     // 47
+// ?????(__NR_signal, ),                                              // 48
+   GENX_(49, sys_ni_syscall), /* unimplemented (by the kernel) */     // 49
+
+   GENX_(50, sys_ni_syscall), /* unimplemented (by the kernel) */     // 50
+   GENX_(__NR_acct, sys_acct),                                        // 51
+   LINX_(__NR_umount2, sys_umount),                                   // 52
+   GENX_(53, sys_ni_syscall), /* unimplemented (by the kernel) */     // 53
+   LINXY(__NR_ioctl,  sys_ioctl),                                     // 54
+
+   LINXY(__NR_fcntl,  sys_fcntl),                                     // 55
+   GENX_(56, sys_ni_syscall), /* unimplemented (by the kernel) */     // 56
+   GENX_(__NR_setpgid,  sys_setpgid),                                 // 57
+   GENX_(58, sys_ni_syscall), /* unimplemented (by the kernel) */     // 58
+   GENX_(59, sys_ni_syscall), /* unimplemented (by the kernel) */     // 59
+
+   GENX_(__NR_umask,  sys_umask),                                     // 60
+   GENX_(__NR_chroot,  sys_chroot),                                   // 61
+// ?????(__NR_ustat, sys_ustat), /* deprecated in favor of statfs */  // 62
+   GENXY(__NR_dup2,  sys_dup2),                                       // 63
+   GENX_(__NR_getppid,  sys_getppid),                                 // 64
+
+   GENX_(__NR_getpgrp,  sys_getpgrp),                                 // 65
+   GENX_(__NR_setsid,  sys_setsid),                                   // 66
+// ?????(__NR_sigaction, ),   /* userspace uses rt_sigaction */       // 67
+   GENX_(68, sys_ni_syscall), /* unimplemented (by the kernel) */     // 68
+   GENX_(69, sys_ni_syscall), /* unimplemented (by the kernel) */     // 69
+
+   GENX_(70, sys_ni_syscall), /* unimplemented (by the kernel) */     // 70
+   GENX_(71, sys_ni_syscall), /* unimplemented (by the kernel) */     // 71
+// ?????(__NR_sigsuspend, ),                                          // 72
+// ?????(__NR_sigpending, ),                                          // 73
+// ?????(__NR_sethostname, ),                                         // 74
+
+   GENX_(__NR_setrlimit,  sys_setrlimit),                             // 75
+   GENXY(76,  sys_getrlimit), /* see also 191 */                      // 76
+   GENXY(__NR_getrusage,  sys_getrusage),                             // 77
+   GENXY(__NR_gettimeofday,  sys_gettimeofday),                       // 78
+   GENX_(__NR_settimeofday, sys_settimeofday),                        // 79
+
+   GENX_(80, sys_ni_syscall), /* unimplemented (by the kernel) */     // 80
+   GENX_(81, sys_ni_syscall), /* unimplemented (by the kernel) */     // 81
+   GENX_(82, sys_ni_syscall), /* unimplemented (by the kernel) */     // 82
+   GENX_(__NR_symlink,  sys_symlink),                                 // 83
+   GENX_(84, sys_ni_syscall), /* unimplemented (by the kernel) */     // 84
+
+   GENX_(__NR_readlink,  sys_readlink),                               // 85
+// ?????(__NR_uselib, ),                                              // 86
+// ?????(__NR_swapon, ),                                              // 87
+// ?????(__NR_reboot, ),                                              // 88
+   GENX_(89, sys_ni_syscall), /* unimplemented (by the kernel) */     // 89
+
+   PLAX_(__NR_mmap, sys_mmap ),                                       // 90
+   GENXY(__NR_munmap,  sys_munmap),                                   // 91
+   GENX_(__NR_truncate,  sys_truncate),                               // 92
+   GENX_(__NR_ftruncate,  sys_ftruncate),                             // 93
+   GENX_(__NR_fchmod,  sys_fchmod),                                   // 94
+
+   GENX_(95, sys_ni_syscall), /* unimplemented (by the kernel) */     // 95
+   GENX_(__NR_getpriority, sys_getpriority),                          // 96
+   GENX_(__NR_setpriority, sys_setpriority),                          // 97
+   GENX_(98, sys_ni_syscall), /* unimplemented (by the kernel) */     // 98
+   GENXY(__NR_statfs,  sys_statfs),                                   // 99
+
+   GENXY(__NR_fstatfs,  sys_fstatfs),                                 // 100
+   GENX_(101, sys_ni_syscall), /* unimplemented (by the kernel) */    // 101
+   PLAXY(__NR_socketcall, sys_socketcall),                            // 102
+   LINXY(__NR_syslog,  sys_syslog),                                   // 103
+   GENXY(__NR_setitimer,  sys_setitimer),                             // 104
+
+   GENXY(__NR_getitimer,  sys_getitimer),                             // 105
+   GENXY(__NR_stat, sys_newstat),                                     // 106
+   GENXY(__NR_lstat, sys_newlstat),                                   // 107
+   GENXY(__NR_fstat, sys_newfstat),                                   // 108
+   GENX_(109, sys_ni_syscall), /* unimplemented (by the kernel) */    // 109
+
+   LINXY(__NR_lookup_dcookie, sys_lookup_dcookie),                    // 110
+   LINX_(__NR_vhangup, sys_vhangup),                                  // 111
+   GENX_(112, sys_ni_syscall), /* unimplemented (by the kernel) */    // 112
+   GENX_(113, sys_ni_syscall), /* unimplemented (by the kernel) */    // 113
+   GENXY(__NR_wait4,  sys_wait4),                                     // 114
+
+// ?????(__NR_swapoff, ),                                             // 115
+   LINXY(__NR_sysinfo,  sys_sysinfo),                                 // 116
+   PLAXY(__NR_ipc, sys_ipc),                                          // 117
+   GENX_(__NR_fsync,  sys_fsync),                                     // 118
+   PLAX_(__NR_sigreturn, sys_sigreturn),                              // 119
+
+   PLAX_(__NR_clone,  sys_clone),                                     // 120
+// ?????(__NR_setdomainname, ),                                       // 121
+   GENXY(__NR_uname, sys_newuname),                                   // 122
+   GENX_(123, sys_ni_syscall), /* unimplemented (by the kernel) */    // 123
+// ?????(__NR_adjtimex, ),                                            // 124
+
+   GENXY(__NR_mprotect,  sys_mprotect),                               // 125
+// LINXY(__NR_sigprocmask, sys_sigprocmask),                          // 126
+   GENX_(127, sys_ni_syscall), /* unimplemented (by the kernel) */    // 127
+   LINX_(__NR_init_module,  sys_init_module),                         // 128
+   LINX_(__NR_delete_module,  sys_delete_module),                     // 129
+
+   GENX_(130, sys_ni_syscall), /* unimplemented (by the kernel) */    // 130
+   LINX_(__NR_quotactl, sys_quotactl),                                // 131
+   GENX_(__NR_getpgid,  sys_getpgid),                                 // 132
+   GENX_(__NR_fchdir,  sys_fchdir),                                   // 133
+// ?????(__NR_bdflush, ),                                             // 134
+
+// ?????(__NR_sysfs, ),                                               // 135
+   LINX_(__NR_personality, sys_personality),                          // 136
+   GENX_(137, sys_ni_syscall), /* unimplemented (by the kernel) */    // 137
+   GENX_(138, sys_ni_syscall), /* unimplemented (by the kernel) */    // 138
+   GENX_(139, sys_ni_syscall), /* unimplemented (by the kernel) */    // 139
+
+// LINXY(__NR__llseek, sys_llseek), /* 64 bit --> lseek */            // 140
+   GENXY(__NR_getdents,  sys_getdents),                               // 141
+   GENX_(__NR_select, sys_select),                                    // 142
+   GENX_(__NR_flock,  sys_flock),                                     // 143
+   GENX_(__NR_msync,  sys_msync),                                     // 144
+
+   GENXY(__NR_readv,  sys_readv),                                     // 145
+   GENX_(__NR_writev,  sys_writev),                                   // 146
+   GENX_(__NR_getsid, sys_getsid),                                    // 147
+   GENX_(__NR_fdatasync,  sys_fdatasync),                             // 148
+   LINXY(__NR__sysctl, sys_sysctl),                                   // 149
+
+   GENX_(__NR_mlock,  sys_mlock),                                     // 150
+   GENX_(__NR_munlock,  sys_munlock),                                 // 151
+   GENX_(__NR_mlockall,  sys_mlockall),                               // 152
+   LINX_(__NR_munlockall,  sys_munlockall),                           // 153
+   LINXY(__NR_sched_setparam,  sys_sched_setparam),                   // 154
+
+   LINXY(__NR_sched_getparam,  sys_sched_getparam),                   // 155
+   LINX_(__NR_sched_setscheduler,  sys_sched_setscheduler),           // 156
+   LINX_(__NR_sched_getscheduler,  sys_sched_getscheduler),           // 157
+   LINX_(__NR_sched_yield,  sys_sched_yield),                         // 158
+   LINX_(__NR_sched_get_priority_max,  sys_sched_get_priority_max),   // 159
+
+   LINX_(__NR_sched_get_priority_min,  sys_sched_get_priority_min),   // 160
+// ?????(__NR_sched_rr_get_interval, ),                               // 161
+   GENXY(__NR_nanosleep,  sys_nanosleep),                             // 162
+   GENX_(__NR_mremap,  sys_mremap),                                   // 163
+   GENX_(164, sys_ni_syscall), /* unimplemented (by the kernel) */    // 164
+
+   GENX_(165, sys_ni_syscall), /* unimplemented (by the kernel) */    // 165
+   GENX_(166, sys_ni_syscall), /* unimplemented (by the kernel) */    // 166
+   GENX_(167, sys_ni_syscall), /* unimplemented (by the kernel) */    // 167
+   GENXY(__NR_poll,  sys_poll),                                       // 168
+// ?????(__NR_nfsservctl, ),                                          // 169
+
+   GENX_(170, sys_ni_syscall), /* unimplemented (by the kernel) */    // 170
+   GENX_(171, sys_ni_syscall), /* unimplemented (by the kernel) */    // 171
+   LINXY(__NR_prctl, sys_prctl),                                      // 172
+   PLAX_(__NR_rt_sigreturn,  sys_rt_sigreturn),                       // 173
+   LINXY(__NR_rt_sigaction,  sys_rt_sigaction),                       // 174
+
+   LINXY(__NR_rt_sigprocmask,  sys_rt_sigprocmask),                   // 175
+   LINXY(__NR_rt_sigpending, sys_rt_sigpending),                      // 176
+   LINXY(__NR_rt_sigtimedwait,  sys_rt_sigtimedwait),                 // 177
+   LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),                  // 178
+   LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend),                      // 179
+
+   GENXY(__NR_pread64,  sys_pread64),                                 // 180
+   GENX_(__NR_pwrite64, sys_pwrite64),                                // 181
+   GENX_(182, sys_ni_syscall), /* unimplemented (by the kernel) */    // 182
+   GENXY(__NR_getcwd,  sys_getcwd),                                   // 183
+   LINXY(__NR_capget,  sys_capget),                                   // 184
+
+   LINX_(__NR_capset,  sys_capset),                                   // 185
+   GENXY(__NR_sigaltstack,  sys_sigaltstack),                         // 186
+   LINXY(__NR_sendfile, sys_sendfile),                                // 187
+   GENX_(188, sys_ni_syscall), /* unimplemented (by the kernel) */    // 188
+   GENX_(189, sys_ni_syscall), /* unimplemented (by the kernel) */    // 189
+
+   GENX_(__NR_vfork,  sys_fork),                                      // 190
+   GENXY(__NR_getrlimit,  sys_getrlimit),                             // 191
+   GENX_(192, sys_ni_syscall), /* not exported on 64bit*/             // 192
+   GENX_(193, sys_ni_syscall), /* unimplemented (by the kernel) */    // 193
+   GENX_(194, sys_ni_syscall), /* unimplemented (by the kernel) */    // 194
+
+   GENX_(195, sys_ni_syscall), /* unimplemented (by the kernel) */    // 195
+   GENX_(196, sys_ni_syscall), /* unimplemented (by the kernel) */    // 196
+   GENX_(197, sys_ni_syscall), /* unimplemented (by the kernel) */    // 197
+   GENX_(__NR_lchown, sys_lchown),                                    // 198
+   GENX_(__NR_getuid, sys_getuid),                                    // 199
+
+   GENX_(__NR_getgid, sys_getgid),                                    // 200
+   GENX_(__NR_geteuid, sys_geteuid),                                  // 201
+   GENX_(__NR_getegid, sys_getegid),                                  // 202
+   GENX_(__NR_setreuid, sys_setreuid),                                // 203
+   GENX_(__NR_setregid, sys_setregid),                                // 204
+
+   GENXY(__NR_getgroups, sys_getgroups),                              // 205
+   GENX_(__NR_setgroups, sys_setgroups),                              // 206
+   GENX_(__NR_fchown, sys_fchown),                                    // 207
+   LINX_(__NR_setresuid, sys_setresuid),                              // 208
+   LINXY(__NR_getresuid, sys_getresuid),                              // 209
+
+   LINX_(__NR_setresgid, sys_setresgid),                              // 210
+   LINXY(__NR_getresgid, sys_getresgid),                              // 211
+   GENX_(__NR_chown, sys_chown),                                      // 212
+   GENX_(__NR_setuid, sys_setuid),                                    // 213
+   GENX_(__NR_setgid, sys_setgid),                                    // 214
+
+   LINX_(__NR_setfsuid, sys_setfsuid),                                // 215
+   LINX_(__NR_setfsgid, sys_setfsgid),                                // 216
+// ?????(__NR_pivot_root, ),
+   GENX_(__NR_mincore, sys_mincore),                                  // 218
+   GENX_(__NR_madvise,  sys_madvise),                                 // 219
+
+   GENXY(__NR_getdents64,  sys_getdents64),                           // 220
+   GENX_(221, sys_ni_syscall), /* unimplemented (by the kernel) */    // 221
+   LINX_(__NR_readahead, sys_readahead),                              // 222
+   GENX_(223, sys_ni_syscall), /* unimplemented (by the kernel) */    // 223
+   LINX_(__NR_setxattr, sys_setxattr),                                // 224
+
+   LINX_(__NR_lsetxattr, sys_lsetxattr),                              // 225
+   LINX_(__NR_fsetxattr, sys_fsetxattr),                              // 226
+   LINXY(__NR_getxattr,  sys_getxattr),                               // 227
+   LINXY(__NR_lgetxattr,  sys_lgetxattr),                             // 228
+   LINXY(__NR_fgetxattr,  sys_fgetxattr),                             // 229
+
+   LINXY(__NR_listxattr,  sys_listxattr),                             // 230
+   LINXY(__NR_llistxattr,  sys_llistxattr),                           // 231
+   LINXY(__NR_flistxattr,  sys_flistxattr),                           // 232
+   LINX_(__NR_removexattr,  sys_removexattr),                         // 233
+   LINX_(__NR_lremovexattr,  sys_lremovexattr),                       // 234
+
+   LINX_(__NR_fremovexattr,  sys_fremovexattr),                       // 235
+   LINX_(__NR_gettid,  sys_gettid),                                   // 236
+   LINXY(__NR_tkill, sys_tkill),                                      // 237
+   LINXY(__NR_futex,  sys_futex),                                     // 238
+   LINX_(__NR_sched_setaffinity,  sys_sched_setaffinity),             // 239
+
+   LINXY(__NR_sched_getaffinity,  sys_sched_getaffinity),             // 240
+   LINXY(__NR_tgkill, sys_tgkill),                                    // 241
+   GENX_(242, sys_ni_syscall), /* unimplemented (by the kernel) */    // 242
+   LINXY(__NR_io_setup, sys_io_setup),                                // 243
+   LINX_(__NR_io_destroy,  sys_io_destroy),                           // 244
+
+   LINXY(__NR_io_getevents,  sys_io_getevents),                       // 245
+   LINX_(__NR_io_submit,  sys_io_submit),                             // 246
+   LINXY(__NR_io_cancel,  sys_io_cancel),                             // 247
+   LINX_(__NR_exit_group,  sys_exit_group),                           // 248
+   LINXY(__NR_epoll_create,  sys_epoll_create),                       // 249
+
+   LINX_(__NR_epoll_ctl,  sys_epoll_ctl),                             // 250
+   LINXY(__NR_epoll_wait,  sys_epoll_wait),                           // 251
+   LINX_(__NR_set_tid_address,  sys_set_tid_address),                 // 252
+   PLAX_(__NR_fadvise64, sys_fadvise64),                              // 253
+   LINXY(__NR_timer_create,  sys_timer_create),                       // 254
+
+   LINXY(__NR_timer_settime,  sys_timer_settime),                     // 255
+   LINXY(__NR_timer_gettime,  sys_timer_gettime),                     // 256
+   LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),               // 257
+   LINX_(__NR_timer_delete,  sys_timer_delete),                       // 258
+   LINX_(__NR_clock_settime,  sys_clock_settime),                     // 259
+
+   LINXY(__NR_clock_gettime,  sys_clock_gettime),                     // 260
+   LINXY(__NR_clock_getres,  sys_clock_getres),                       // 261
+   LINXY(__NR_clock_nanosleep,  sys_clock_nanosleep),                 // 262
+   GENX_(263, sys_ni_syscall), /* unimplemented (by the kernel) */    // 263
+   GENX_(264, sys_ni_syscall), /* unimplemented (by the kernel) */    // 264
+
+   GENXY(__NR_statfs64, sys_statfs64),                                // 265
+   GENXY(__NR_fstatfs64, sys_fstatfs64),                              // 266
+// ?????(__NR_remap_file_pages, ),
+   GENX_(268, sys_ni_syscall), /* unimplemented (by the kernel) */    // 268
+   GENX_(269, sys_ni_syscall), /* unimplemented (by the kernel) */    // 269
+
+   GENX_(270, sys_ni_syscall), /* unimplemented (by the kernel) */    // 270
+   LINXY(__NR_mq_open,  sys_mq_open),                                 // 271
+   LINX_(__NR_mq_unlink,  sys_mq_unlink),                             // 272
+   LINX_(__NR_mq_timedsend,  sys_mq_timedsend),                       // 273
+   LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),                  // 274
+
+   LINX_(__NR_mq_notify,  sys_mq_notify),                             // 275
+   LINXY(__NR_mq_getsetattr,  sys_mq_getsetattr),                     // 276
+// ?????(__NR_kexec_load, ),
+   LINX_(__NR_add_key,  sys_add_key),                                 // 278
+   LINX_(__NR_request_key,  sys_request_key),                         // 279
+
+   LINXY(__NR_keyctl,  sys_keyctl),                                   // 280
+   LINXY(__NR_waitid, sys_waitid),                                    // 281
+   LINX_(__NR_ioprio_set,  sys_ioprio_set),                           // 282
+   LINX_(__NR_ioprio_get,  sys_ioprio_get),                           // 283
+   LINX_(__NR_inotify_init,  sys_inotify_init),                       // 284
+
+   LINX_(__NR_inotify_add_watch,  sys_inotify_add_watch),             // 285
+   LINX_(__NR_inotify_rm_watch,  sys_inotify_rm_watch),               // 286
+   GENX_(287, sys_ni_syscall), /* unimplemented (by the kernel) */    // 287
+   LINXY(__NR_openat,  sys_openat),                                   // 288
+   LINX_(__NR_mkdirat,  sys_mkdirat),                                 // 289
+
+   LINX_(__NR_mknodat,  sys_mknodat),                                 // 290
+   LINX_(__NR_fchownat,  sys_fchownat),                               // 291
+   LINX_(__NR_futimesat,  sys_futimesat),                             // 292
+   LINXY(__NR_newfstatat, sys_newfstatat),                            // 293
+   LINX_(__NR_unlinkat,  sys_unlinkat),                               // 294
+
+   LINX_(__NR_renameat,  sys_renameat),                               // 295
+   LINX_(__NR_linkat,  sys_linkat),                                   // 296
+   LINX_(__NR_symlinkat,  sys_symlinkat),                             // 297
+   LINX_(__NR_readlinkat,  sys_readlinkat),                           // 298
+   LINX_(__NR_fchmodat,  sys_fchmodat),                               // 299
+
+   LINX_(__NR_faccessat,  sys_faccessat),                             // 300
+   LINX_(__NR_pselect6, sys_pselect6),                                // 301
+   LINXY(__NR_ppoll, sys_ppoll),                                      // 302
+// ?????(__NR_unshare, ),
+   LINX_(__NR_set_robust_list,  sys_set_robust_list),                 // 304
+
+   LINXY(__NR_get_robust_list,  sys_get_robust_list),                 // 305
+// ?????(__NR_splice, ),
+   LINX_(__NR_sync_file_range, sys_sync_file_range),                  // 307
+// ?????(__NR_tee, ),
+// ?????(__NR_vmsplice, ),
+
+   GENX_(310, sys_ni_syscall), /* unimplemented (by the kernel) */    // 310
+// ?????(__NR_getcpu, ),
+   LINXY(__NR_epoll_pwait,  sys_epoll_pwait),                         // 312
+   GENX_(__NR_utimes, sys_utimes),                                    // 313
+   LINX_(__NR_fallocate, sys_fallocate),                              // 314
+
+   LINX_(__NR_utimensat,  sys_utimensat),                             // 315
+   LINXY(__NR_signalfd,  sys_signalfd),                               // 316
+   GENX_(317, sys_ni_syscall), /* unimplemented (by the kernel) */    // 317
+   LINX_(__NR_eventfd,  sys_eventfd),                                 // 318
+   LINXY(__NR_timerfd_create,  sys_timerfd_create),                   // 319
+
+   LINXY(__NR_timerfd_settime,  sys_timerfd_settime),                 // 320
+   LINXY(__NR_timerfd_gettime,  sys_timerfd_gettime),                 // 321
+   LINXY(__NR_signalfd4,  sys_signalfd4),                             // 322
+   LINX_(__NR_eventfd2,  sys_eventfd2),                               // 323
+   LINXY(__NR_inotify_init1,  sys_inotify_init1),                     // 324
+
+   LINXY(__NR_pipe2,  sys_pipe2),                                     // 325
+   // (__NR_dup3,  ),
+   LINXY(__NR_epoll_create1,  sys_epoll_create1),                     // 327
+   LINXY(__NR_preadv, sys_preadv),                                    // 328
+   LINX_(__NR_pwritev, sys_pwritev),                                  // 329
+
+// ?????(__NR_rt_tgsigqueueinfo, ),
+   LINXY(__NR_perf_event_open, sys_perf_counter_open),                // 331
+};
+
+SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
+{
+   const UInt syscall_table_size
+      = sizeof(syscall_table) / sizeof(syscall_table[0]);
+
+   /* Is it in the contiguous initial section of the table? */
+   if (sysno < syscall_table_size) {
+      SyscallTableEntry* sys = &syscall_table[sysno];
+      if (sys->before == NULL)
+         return NULL; /* no entry */
+      else
+         return sys;
+   }
+
+   /* Can't find a wrapper */
+   return NULL;
+}
+
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
diff --git a/coregrind/m_trampoline.S b/coregrind/m_trampoline.S
index 07d7689..0fdcf9e 100644
--- a/coregrind/m_trampoline.S
+++ b/coregrind/m_trampoline.S
@@ -1214,6 +1214,39 @@
 .fill 2048, 2, 0x0b0f /* `ud2` */
 
 
+/*---------------- s390x-linux ----------------*/
+#else
+#if defined(VGP_s390x_linux)
+
+        /* a leading page of unexecutable code */
+	.fill 2048, 2, 0x0000
+
+.global VG_(trampoline_stuff_start)
+VG_(trampoline_stuff_start):
+
+.global VG_(s390x_linux_SUBST_FOR_sigreturn)
+VG_(s390x_linux_SUBST_FOR_sigreturn):
+        svc __NR_sigreturn
+	.short 0
+
+.global VG_(s390x_linux_SUBST_FOR_rt_sigreturn)
+VG_(s390x_linux_SUBST_FOR_rt_sigreturn):
+        /* Old gcc unwinding code checks for a sig(_rt)_return svc and then
+           for ra = cfa to decide if it is a sig_rt_frame or not. Since we
+           set ra to this trampoline, but the cfa is still in the stack,
+           the unwinder thinks, that this is a non-rt frame  and causes a
+           crash in the gcc unwinder - which is used by the thread library
+           and others. Therefore we add a lr 1,1 nop, to let the gcc
+           unwinder bail out gracefully. This might also affect unwinding
+           across the signal frame - tough luck. fixs390 */
+        lr 1,1
+        svc __NR_rt_sigreturn
+	.short 0
+
+.globl VG_(trampoline_stuff_end)
+VG_(trampoline_stuff_end):
+	.fill 2048, 2, 0x0000
+
 /*---------------- unknown ----------------*/
 #else
 #  error Unknown platform
@@ -1226,6 +1259,7 @@
 #endif
 #endif
 #endif
+#endif
 
 #if defined(VGO_linux)
 /* Let the linker know we don't need an executable stack */
diff --git a/coregrind/m_translate.c b/coregrind/m_translate.c
index 4cdf220..864f38d 100644
--- a/coregrind/m_translate.c
+++ b/coregrind/m_translate.c
@@ -692,7 +692,7 @@
 
 static Bool translations_allowable_from_seg ( NSegment const* seg )
 {
-#  if defined(VGA_x86)
+#  if defined(VGA_x86) || defined(VGA_s390x)
    Bool allowR = True;
 #  else
    Bool allowR = False;
@@ -1503,7 +1503,8 @@
           ? (void*) &VG_(run_innerloop__dispatch_profiled)
           : (void*) &VG_(run_innerloop__dispatch_unprofiled);
 #  elif defined(VGA_ppc32) || defined(VGA_ppc64) \
-        || defined(VGA_arm)
+        || defined(VGA_arm) || defined(VGA_s390x)
+   /* See comment libvex.h; machine has link register --> dipatch = NULL */
    vta.dispatch = NULL;
 #  else
 #    error "Unknown arch"
diff --git a/coregrind/m_transtab.c b/coregrind/m_transtab.c
index ba6940f..f08afa9 100644
--- a/coregrind/m_transtab.c
+++ b/coregrind/m_transtab.c
@@ -901,6 +901,9 @@
 #  elif defined(VGA_amd64)
    /* no need to do anything, hardware provides coherence */
 
+#  elif defined(VGA_s390x)
+   /* no need to do anything, hardware provides coherence */
+
 #  elif defined(VGP_arm_linux)
    /* ARM cache flushes are privileged, so we must defer to the kernel. */
    Addr startaddr = (Addr) ptr;
diff --git a/coregrind/pub_core_basics.h b/coregrind/pub_core_basics.h
index 35aba2f..0718c82 100644
--- a/coregrind/pub_core_basics.h
+++ b/coregrind/pub_core_basics.h
@@ -58,6 +58,8 @@
 #  include "libvex_guest_ppc64.h"
 #elif defined(VGA_arm)
 #  include "libvex_guest_arm.h"
+#elif defined(VGA_s390x)
+#  include "libvex_guest_s390x.h"
 #else
 #  error Unknown arch
 #endif
@@ -105,6 +107,10 @@
             UInt r11;
             UInt r7;
          } ARM;
+         struct {
+            ULong r_fp;
+            ULong r_lr;
+         } S390X;
       } misc;
    }
    UnwindStartRegs;
diff --git a/coregrind/pub_core_debuginfo.h b/coregrind/pub_core_debuginfo.h
index facca24..330970a 100644
--- a/coregrind/pub_core_debuginfo.h
+++ b/coregrind/pub_core_debuginfo.h
@@ -123,6 +123,10 @@
 typedef
    UChar  /* should be void, but gcc complains at use points */
    D3UnwindRegs;
+#elif defined(VGA_s390x)
+typedef
+   struct { Addr ia; Addr sp; Addr fp; Addr lr;}
+   D3UnwindRegs;
 #else
 #  error "Unsupported arch"
 #endif
diff --git a/coregrind/pub_core_machine.h b/coregrind/pub_core_machine.h
index ea32dc8..2043a52 100644
--- a/coregrind/pub_core_machine.h
+++ b/coregrind/pub_core_machine.h
@@ -75,6 +75,11 @@
 #  undef  VG_ELF_MACHINE
 #  undef  VG_ELF_CLASS
 #  undef  VG_PLAT_USES_PPCTOC
+#elif defined(VGP_s390x_linux)
+#  define VG_ELF_DATA2XXX     ELFDATA2MSB
+#  define VG_ELF_MACHINE      EM_S390
+#  define VG_ELF_CLASS        ELFCLASS64
+#  undef  VG_PLAT_USES_PPCTOC
 #else
 #  error Unknown platform
 #endif
@@ -99,6 +104,10 @@
 #  define VG_INSTR_PTR        guest_R15T
 #  define VG_STACK_PTR        guest_R13
 #  define VG_FRAME_PTR        guest_R11
+#elif defined(VGA_s390x)
+#  define VG_INSTR_PTR        guest_IA
+#  define VG_STACK_PTR        guest_SP
+#  define VG_FRAME_PTR        guest_FP
 #else
 #  error Unknown arch
 #endif
diff --git a/coregrind/pub_core_mallocfree.h b/coregrind/pub_core_mallocfree.h
index 4eb11f0..7c3cbb1 100644
--- a/coregrind/pub_core_mallocfree.h
+++ b/coregrind/pub_core_mallocfree.h
@@ -77,6 +77,7 @@
 // for any AltiVec- or SSE-related type.  This matches the Darwin libc.
 #elif defined(VGP_amd64_linux) || \
       defined(VGP_ppc64_linux) || \
+      defined(VGP_s390x_linux) || \
       defined(VGP_ppc64_aix5)  || \
       defined(VGP_ppc32_aix5)  || \
       defined(VGP_x86_darwin)  || \
diff --git a/coregrind/pub_core_syscall.h b/coregrind/pub_core_syscall.h
index 9f9c068..7a79905 100644
--- a/coregrind/pub_core_syscall.h
+++ b/coregrind/pub_core_syscall.h
@@ -80,6 +80,7 @@
                                            UInt wHI, UInt wLO );
 extern SysRes VG_(mk_SysRes_amd64_darwin)( UChar scclass, Bool isErr,
                                            ULong wHI, ULong wLO );
+extern SysRes VG_(mk_SysRes_s390x_linux) ( Long val );
 extern SysRes VG_(mk_SysRes_Error)       ( UWord val );
 extern SysRes VG_(mk_SysRes_Success)     ( UWord val );
 
diff --git a/coregrind/pub_core_threadstate.h b/coregrind/pub_core_threadstate.h
index 3e63da1..a784d0e 100644
--- a/coregrind/pub_core_threadstate.h
+++ b/coregrind/pub_core_threadstate.h
@@ -85,6 +85,8 @@
    typedef VexGuestPPC64State VexGuestArchState;
 #elif defined(VGA_arm)
    typedef VexGuestARMState   VexGuestArchState;
+#elif defined(VGA_s390x)
+   typedef VexGuestS390XState VexGuestArchState;
 #else
 #  error Unknown architecture
 #endif
diff --git a/coregrind/pub_core_trampoline.h b/coregrind/pub_core_trampoline.h
index fff4b16..5eafcbb 100644
--- a/coregrind/pub_core_trampoline.h
+++ b/coregrind/pub_core_trampoline.h
@@ -140,6 +140,11 @@
 extern UInt VG_(amd64_darwin_REDIR_FOR_arc4random)( void );
 #endif
 
+#if defined(VGP_s390x_linux)
+extern Addr VG_(s390x_linux_SUBST_FOR_sigreturn);
+extern Addr VG_(s390x_linux_SUBST_FOR_rt_sigreturn);
+#endif
+
 #endif   // __PUB_CORE_TRAMPOLINE_H
 
 /*--------------------------------------------------------------------*/
diff --git a/coregrind/pub_core_transtab_asm.h b/coregrind/pub_core_transtab_asm.h
index d3292f1..76e48db 100644
--- a/coregrind/pub_core_transtab_asm.h
+++ b/coregrind/pub_core_transtab_asm.h
@@ -43,7 +43,10 @@
    2)[VG_TT_FAST_BITS-1 : 0]' on those targets.
 
    On ARM we do like ppc32/ppc64, although that will have to be
-   revisited when we come to implement Thumb. */
+   revisited when we come to implement Thumb.
+
+   On s390x the rightmost bit of an instruction address is zero.
+   For best table utilization shift the address to the right by 1 bit. */
 
 #define VG_TT_FAST_BITS 15
 #define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS)
@@ -55,6 +58,8 @@
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr))     ) & VG_TT_FAST_MASK)
 #elif defined(VGA_ppc32) || defined(VGA_ppc64) || defined(VGA_arm)
 #  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 2) & VG_TT_FAST_MASK)
+#elif defined(VGA_s390x)
+#  define VG_TT_FAST_HASH(_addr)  ((((UWord)(_addr)) >> 1) & VG_TT_FAST_MASK)
 #else
 #  error "VG_TT_FAST_HASH: unknown platform"
 #endif