Update Valgrind and TSan.

Rebased local changes on:
http://valgrind-variant.googlecode.com/svn/trunk@125
http://data-race-test.googlecode.com/svn/trunk@3717

Change-Id: I4d7a227ad72fb8e9998015a5fe26e00496ec1da3
diff --git a/main/VEX/priv/guest_amd64_defs.h b/main/VEX/priv/guest_amd64_defs.h
index 33450c3..7d3ed34 100644
--- a/main/VEX/priv/guest_amd64_defs.h
+++ b/main/VEX/priv/guest_amd64_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/guest_amd64_helpers.c b/main/VEX/priv/guest_amd64_helpers.c
index f626f0a..d554918 100644
--- a/main/VEX/priv/guest_amd64_helpers.c
+++ b/main/VEX/priv/guest_amd64_helpers.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -877,6 +877,7 @@
 #  define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
 #  define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
 #  define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
+#  define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
 #  define mkU8(_n)  IRExpr_Const(IRConst_U8(_n))
 
    Int i, arity = 0;
@@ -959,34 +960,34 @@
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondZ)) {
          /* long sub/cmp, then Z --> test dst==src */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpEQ64, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpEQ32,
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
       }
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNZ)) {
          /* long sub/cmp, then NZ --> test dst!=src */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpNE64, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpNE32,
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
       }
 
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondL)) {
          /* long sub/cmp, then L (signed less than) 
             --> test dst <s src */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLT64S, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpLT32S,
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
       }
 
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondLE)) {
          /* long sub/cmp, then LE (signed less than or equal) 
             --> test dst <=s src */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLE64S, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpLE32S,
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
 
       }
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNLE)) {
@@ -995,9 +996,9 @@
             --> test (dst >s src)
             --> test (src <s dst) */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLT64S,
-                           binop(Iop_Shl64,cc_dep2,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep1,mkU8(32))));
+                     binop(Iop_CmpLT32S,
+                           unop(Iop_64to32, cc_dep2),
+                           unop(Iop_64to32, cc_dep1)));
 
       }
 
@@ -1005,28 +1006,37 @@
          /* long sub/cmp, then BE (unsigned less than or equal)
             --> test dst <=u src */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLE64U, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpLE32U, 
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
       }
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondNBE)) {
          /* long sub/cmp, then NBE (unsigned greater than)
             --> test src <u dst */
          /* Note, args are opposite way round from the usual */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLT64U, 
-                           binop(Iop_Shl64,cc_dep2,mkU8(32)),
-                           binop(Iop_Shl64,cc_dep1,mkU8(32))));
+                     binop(Iop_CmpLT32U, 
+                           unop(Iop_64to32, cc_dep2),
+                           unop(Iop_64to32, cc_dep1)));
       }
 
       if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondS)) {
          /* long sub/cmp, then S (negative) --> test (dst-src <s 0) */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLT64S,
-                           binop(Iop_Sub64,
-                                 binop(Iop_Shl64, cc_dep1, mkU8(32)), 
-                                 binop(Iop_Shl64, cc_dep2, mkU8(32))),
-                           mkU64(0)));
+                     binop(Iop_CmpLT32S,
+                           binop(Iop_Sub32,
+                                 unop(Iop_64to32, cc_dep1), 
+                                 unop(Iop_64to32, cc_dep2)),
+                           mkU32(0)));
+      }
+
+      if (isU64(cc_op, AMD64G_CC_OP_SUBL) && isU64(cond, AMD64CondB)) {
+         /* long sub/cmp, then B (unsigned less than)
+            --> test dst <u src */
+         return unop(Iop_1Uto64,
+                     binop(Iop_CmpLT32U,
+                           unop(Iop_64to32, cc_dep1),
+                           unop(Iop_64to32, cc_dep2)));
       }
 
       /*---------------- SUBW ----------------*/
@@ -1073,6 +1083,15 @@
                            unop(Iop_64to8,cc_dep2)));
       }
 
+      if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondBE)) {
+         /* byte sub/cmp, then BE (unsigned less than or equal)
+            --> test dst <=u src */
+         return unop(Iop_1Uto64,
+                     binop(Iop_CmpLE64U, 
+                           binop(Iop_And64, cc_dep1, mkU64(0xFF)),
+                           binop(Iop_And64, cc_dep2, mkU64(0xFF))));
+      }
+
       if (isU64(cc_op, AMD64G_CC_OP_SUBB) && isU64(cond, AMD64CondS)
                                           && isU64(cc_dep2, 0)) {
          /* byte sub/cmp of zero, then S --> test (dst-0 <s 0)
@@ -1107,6 +1126,11 @@
          return unop(Iop_1Uto64,
                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
       }
+      if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondNZ)) {
+         /* long long and/or/xor, then NZ --> test dst!=0 */
+         return unop(Iop_1Uto64,
+                     binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
+      }
 
       if (isU64(cc_op, AMD64G_CC_OP_LOGICQ) && isU64(cond, AMD64CondL)) {
          /* long long and/or/xor, then L
@@ -1126,17 +1150,16 @@
       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondZ)) {
          /* long and/or/xor, then Z --> test dst==0 */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpEQ64, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)), 
-                           mkU64(0)));
+                     binop(Iop_CmpEQ32,
+                           unop(Iop_64to32, cc_dep1), 
+                           mkU32(0)));
       }
-
       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNZ)) {
          /* long and/or/xor, then NZ --> test dst!=0 */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpNE64, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)), 
-                           mkU64(0)));
+                     binop(Iop_CmpNE32,
+                           unop(Iop_64to32, cc_dep1), 
+                           mkU32(0)));
       }
 
       if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondLE)) {
@@ -1147,9 +1170,24 @@
             the result is <=signed 0.  Hence ...
          */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLE64S, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)), 
-                           mkU64(0)));
+                     binop(Iop_CmpLE32S,
+                           unop(Iop_64to32, cc_dep1), 
+                           mkU32(0)));
+      }
+
+      if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondS)) {
+         /* long and/or/xor, then S --> (ULong)result[31] */
+         return binop(Iop_And64,
+                      binop(Iop_Shr64, cc_dep1, mkU8(31)),
+                      mkU64(1));
+      }
+      if (isU64(cc_op, AMD64G_CC_OP_LOGICL) && isU64(cond, AMD64CondNS)) {
+         /* long and/or/xor, then S --> (ULong) ~ result[31] */
+         return binop(Iop_Xor64,
+                binop(Iop_And64,
+                      binop(Iop_Shr64, cc_dep1, mkU8(31)),
+                      mkU64(1)),
+                mkU64(1));
       }
 
       /*---------------- LOGICB ----------------*/
@@ -1179,6 +1217,14 @@
                       binop(Iop_Shr64,cc_dep1,mkU8(7)),
                       mkU64(1));
       }
+      if (isU64(cc_op, AMD64G_CC_OP_LOGICB) && isU64(cond, AMD64CondNS)) {
+         /* byte and/or/xor, then NS --> (UInt)!result[7] */
+         return binop(Iop_Xor64,
+                      binop(Iop_And64,
+                            binop(Iop_Shr64,cc_dep1,mkU8(7)),
+                            mkU64(1)),
+                      mkU64(1));
+      }
 
       /*---------------- INCB ----------------*/
 
@@ -1206,9 +1252,9 @@
       if (isU64(cc_op, AMD64G_CC_OP_DECL) && isU64(cond, AMD64CondZ)) {
          /* dec L, then Z --> test dst == 0 */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpEQ64,
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)),
-                           mkU64(0)));
+                     binop(Iop_CmpEQ32,
+                           unop(Iop_64to32, cc_dep1),
+                           mkU32(0)));
       }
 
       /*---------------- DECW ----------------*/
@@ -1329,9 +1375,9 @@
       if (isU64(cc_op, AMD64G_CC_OP_SUBL)) {
          /* C after sub denotes unsigned less than */
          return unop(Iop_1Uto64,
-                     binop(Iop_CmpLT64U, 
-                           binop(Iop_Shl64,cc_dep1,mkU8(32)), 
-                           binop(Iop_Shl64,cc_dep2,mkU8(32))));
+                     binop(Iop_CmpLT32U,
+                           unop(Iop_64to32, cc_dep1), 
+                           unop(Iop_64to32, cc_dep2)));
       }
       if (isU64(cc_op, AMD64G_CC_OP_SUBB)) {
          /* C after sub denotes unsigned less than */
@@ -1365,6 +1411,7 @@
 #  undef unop
 #  undef binop
 #  undef mkU64
+#  undef mkU32
 #  undef mkU8
 
    return NULL;
diff --git a/main/VEX/priv/guest_amd64_toIR.c b/main/VEX/priv/guest_amd64_toIR.c
index 59105be..a3d817e 100644
--- a/main/VEX/priv/guest_amd64_toIR.c
+++ b/main/VEX/priv/guest_amd64_toIR.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -1104,6 +1104,13 @@
                       Ity_I16 );
 }
 
+static void putIReg16 ( UInt regno, IRExpr* e )
+{
+   vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
+   stmt( IRStmt_Put( integerGuestReg64Offset(regno), 
+                     unop(Iop_16Uto64,e) ) );
+}
+
 static HChar* nameIReg16 ( UInt regno )
 {
    return nameIReg( 2, regno, False );
@@ -3271,7 +3278,7 @@
    }
 
    isShift = False;
-   switch (gregLO3ofRM(modrm)) { case 4: case 5: case 7: isShift = True; }
+   switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
 
    isRotate = False;
    switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
@@ -3279,11 +3286,6 @@
    isRotateC = False;
    switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
 
-   if (gregLO3ofRM(modrm) == 6) {
-      *decode_OK = False;
-      return delta;
-   }
-
    if (!isShift && !isRotate && !isRotateC) {
       /*NOTREACHED*/
       vpanic("dis_Grp2(Reg): unhandled case(amd64)");
@@ -3358,6 +3360,7 @@
       switch (gregLO3ofRM(modrm)) { 
          case 4: op64 = Iop_Shl64; break;
          case 5: op64 = Iop_Shr64; break;
+         case 6: op64 = Iop_Shl64; break;
          case 7: op64 = Iop_Sar64; break;
          /*NOTREACHED*/
          default: vpanic("dis_Grp2:shift"); break;
@@ -4115,7 +4118,7 @@
 }
 
 static
-void dis_string_op( void (*dis_OP)( Int, IRTemp ), 
+void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
                     Int sz, HChar* name, Prefix pfx )
 {
    IRTemp t_inc = newTemp(Ity_I64);
@@ -4123,67 +4126,98 @@
       The following assertion catches any resulting sillyness. */
    vassert(pfx == clearSegBits(pfx));
    dis_string_op_increment(sz, t_inc);
-   dis_OP( sz, t_inc );
+   dis_OP( sz, t_inc, pfx );
    DIP("%s%c\n", name, nameISize(sz));
 }
 
 static 
-void dis_MOVS ( Int sz, IRTemp t_inc )
+void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
 {
    IRType ty = szToITy(sz);
    IRTemp td = newTemp(Ity_I64);   /* RDI */
    IRTemp ts = newTemp(Ity_I64);   /* RSI */
+   IRExpr *incd, *incs;
 
-   assign( td, getIReg64(R_RDI) );
-   assign( ts, getIReg64(R_RSI) );
+   if (haveASO(pfx)) {
+      assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
+      assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
+   } else {
+      assign( td, getIReg64(R_RDI) );
+      assign( ts, getIReg64(R_RSI) );
+   }
 
    storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
 
-   putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
-   putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+   incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
+   incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
+   if (haveASO(pfx)) {
+      incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
+      incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
+   }
+   putIReg64( R_RDI, incd );
+   putIReg64( R_RSI, incs );
 }
 
 static 
-void dis_LODS ( Int sz, IRTemp t_inc )
+void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
 {
    IRType ty = szToITy(sz);
    IRTemp ts = newTemp(Ity_I64);   /* RSI */
+   IRExpr *incs;
 
-   assign( ts, getIReg64(R_RSI) );
+   if (haveASO(pfx))
+      assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
+   else
+      assign( ts, getIReg64(R_RSI) );
 
    putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
 
-   putIReg64( R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+   incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
+   if (haveASO(pfx))
+      incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
+   putIReg64( R_RSI, incs );
 }
 
 static 
-void dis_STOS ( Int sz, IRTemp t_inc )
+void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
 {
    IRType ty = szToITy(sz);
    IRTemp ta = newTemp(ty);        /* rAX */
    IRTemp td = newTemp(Ity_I64);   /* RDI */
+   IRExpr *incd;
 
    assign( ta, getIRegRAX(sz) );
 
-   assign( td, getIReg64(R_RDI) );
+   if (haveASO(pfx))
+      assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
+   else
+      assign( td, getIReg64(R_RDI) );
 
    storeLE( mkexpr(td), mkexpr(ta) );
 
-   putIReg64( R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+   incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
+   if (haveASO(pfx))
+      incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
+   putIReg64( R_RDI, incd );
 }
 
 static 
-void dis_CMPS ( Int sz, IRTemp t_inc )
+void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
 {
    IRType ty  = szToITy(sz);
    IRTemp tdv = newTemp(ty);      /* (RDI) */
    IRTemp tsv = newTemp(ty);      /* (RSI) */
    IRTemp td  = newTemp(Ity_I64); /*  RDI  */
    IRTemp ts  = newTemp(Ity_I64); /*  RSI  */
+   IRExpr *incd, *incs;
 
-   assign( td, getIReg64(R_RDI) );
-
-   assign( ts, getIReg64(R_RSI) );
+   if (haveASO(pfx)) {
+      assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
+      assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
+   } else {
+      assign( td, getIReg64(R_RDI) );
+      assign( ts, getIReg64(R_RSI) );
+   }
 
    assign( tdv, loadLE(ty,mkexpr(td)) );
 
@@ -4191,28 +4225,40 @@
 
    setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
 
-   putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
-
-   putIReg64(R_RSI, binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc)) );
+   incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
+   incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
+   if (haveASO(pfx)) {
+      incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
+      incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
+   }
+   putIReg64( R_RDI, incd );
+   putIReg64( R_RSI, incs );
 }
 
 static 
-void dis_SCAS ( Int sz, IRTemp t_inc )
+void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
 {
    IRType ty  = szToITy(sz);
    IRTemp ta  = newTemp(ty);       /*  rAX  */
    IRTemp td  = newTemp(Ity_I64);  /*  RDI  */
    IRTemp tdv = newTemp(ty);       /* (RDI) */
+   IRExpr *incd;
 
    assign( ta, getIRegRAX(sz) );
 
-   assign( td, getIReg64(R_RDI) );
+   if (haveASO(pfx))
+      assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
+   else
+      assign( td, getIReg64(R_RDI) );
 
    assign( tdv, loadLE(ty,mkexpr(td)) );
 
    setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
 
-   putIReg64(R_RDI, binop(Iop_Add64, mkexpr(td), mkexpr(t_inc)) );
+   incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
+   if (haveASO(pfx))
+      incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
+   putIReg64( R_RDI, incd );
 }
 
 
@@ -4221,27 +4267,37 @@
    the next insn, rather than just falling through. */
 static 
 void dis_REP_op ( AMD64Condcode cond,
-                  void (*dis_OP)(Int, IRTemp),
+                  void (*dis_OP)(Int, IRTemp, Prefix),
                   Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
                   Prefix pfx )
 {
    IRTemp t_inc = newTemp(Ity_I64);
-   IRTemp tc    = newTemp(Ity_I64);  /*  RCX  */
+   IRTemp tc;
+   IRExpr* cmp;
 
    /* Really we ought to inspect the override prefixes, but we don't.
       The following assertion catches any resulting sillyness. */
    vassert(pfx == clearSegBits(pfx));
 
-   assign( tc, getIReg64(R_RCX) );
+   if (haveASO(pfx)) {
+      tc = newTemp(Ity_I32);  /*  ECX  */
+      assign( tc, getIReg32(R_RCX) );
+      cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
+   } else {
+      tc = newTemp(Ity_I64);  /*  RCX  */
+      assign( tc, getIReg64(R_RCX) );
+      cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
+   }
 
-   stmt( IRStmt_Exit( binop(Iop_CmpEQ64,mkexpr(tc),mkU64(0)),
-                      Ijk_Boring,
-                      IRConst_U64(rip_next) ) );
+   stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) );
 
-   putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
+   if (haveASO(pfx))
+      putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
+  else
+      putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
 
    dis_string_op_increment(sz, t_inc);
-   dis_OP (sz, t_inc);
+   dis_OP (sz, t_inc, pfx);
 
    if (cond == AMD64CondAlways) {
       jmp_lit(Ijk_Boring,rip);
@@ -6504,9 +6560,9 @@
       case 0x65: op = Iop_CmpGT16Sx4; break;
       case 0x66: op = Iop_CmpGT32Sx2; break;
 
-      case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
-      case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
-      case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+      case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
+      case 0x63: op = Iop_QNarrowBin16Sto8Sx8;  eLeft = True; break;
+      case 0x67: op = Iop_QNarrowBin16Sto8Ux8;  eLeft = True; break;
 
       case 0x68: op = Iop_InterleaveHI8x8;  eLeft = True; break;
       case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
@@ -7629,18 +7685,23 @@
    IRType ty = szToITy(sz);
    IRTemp t1 = newTemp(ty);
    IRTemp t2 = newTemp(ty);
-   vassert(sz == 4 || sz == 8);
+   vassert(sz == 2 || sz == 4 || sz == 8);
    vassert(regLo3 < 8);
    if (sz == 8) {
       assign( t1, getIReg64(R_RAX) );
       assign( t2, getIRegRexB(8, pfx, regLo3) );
       putIReg64( R_RAX, mkexpr(t2) );
       putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
-   } else {
+   } else if (sz == 4) {
       assign( t1, getIReg32(R_RAX) );
       assign( t2, getIRegRexB(4, pfx, regLo3) );
       putIReg32( R_RAX, mkexpr(t2) );
       putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
+   } else {
+      assign( t1, getIReg16(R_RAX) );
+      assign( t2, getIRegRexB(2, pfx, regLo3) );
+      putIReg16( R_RAX, mkexpr(t2) );
+      putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
    }
    DIP("xchg%c %s, %s\n", 
        nameISize(sz), nameIRegRAX(sz), 
@@ -11790,7 +11851,8 @@
    if (have66noF2noF3(pfx) && sz == 2 
        && insn[0] == 0x0F && insn[1] == 0x6B) {
       delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 
-                                 "packssdw", Iop_QNarrow32Sx4, True );
+                                 "packssdw",
+                                 Iop_QNarrowBin32Sto16Sx8, True );
       goto decode_success;
    }
 
@@ -11798,7 +11860,8 @@
    if (have66noF2noF3(pfx) && sz == 2 
        && insn[0] == 0x0F && insn[1] == 0x63) {
       delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 
-                                 "packsswb", Iop_QNarrow16Sx8, True );
+                                 "packsswb",
+                                 Iop_QNarrowBin16Sto8Sx16, True );
       goto decode_success;
    }
 
@@ -11806,7 +11869,8 @@
    if (have66noF2noF3(pfx) && sz == 2 
        && insn[0] == 0x0F && insn[1] == 0x67) {
       delta = dis_SSEint_E_to_G( vbi, pfx, delta+2, 
-                                 "packuswb", Iop_QNarrow16Ux8, True );
+                                 "packuswb",
+                                 Iop_QNarrowBin16Sto8Ux16, True );
       goto decode_success;
    }
 
@@ -15405,7 +15469,7 @@
                  isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
                      : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
          imm = insn[3+1];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+1+1;
          DIP( "rounds%c $%d,%s,%s\n",
               isD ? 'd' : 's',
@@ -15415,7 +15479,7 @@
          addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
          assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
          imm = insn[3+alen];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+alen+1;
          DIP( "rounds%c $%d,%s,%s\n",
               isD ? 'd' : 's',
@@ -15460,7 +15524,7 @@
          assign( src1, 
                  getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
          imm = insn[3+1];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+1+1;
          DIP( "roundpd $%d,%s,%s\n",
               imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
@@ -15473,7 +15537,7 @@
          assign( src1, loadLE(Ity_F64,
                               binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
          imm = insn[3+alen];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+alen+1;
          DIP( "roundpd $%d,%s,%s\n",
               imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
@@ -15523,7 +15587,7 @@
          assign( src3, 
                  getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
          imm = insn[3+1];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+1+1;
          DIP( "roundps $%d,%s,%s\n",
               imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
@@ -15540,7 +15604,7 @@
          assign( src3, loadLE(Ity_F32,
                               binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
          imm = insn[3+alen];
-         if (imm & ~7) goto decode_failure;
+         if (imm & ~15) goto decode_failure;
          delta += 3+alen+1;
          DIP( "roundps $%d,%s,%s\n",
               imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
@@ -15673,7 +15737,7 @@
       switch (imm) {
          case 0x00:
          case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
-         case 0x1A: case 0x3A: case 0x44: case 0x4A:
+         case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A:
             break;
          default:
             goto decode_failure;
@@ -15754,7 +15818,8 @@
 
    /* 66 0f 38 17 /r = PTEST xmm1, xmm2/m128
       Logical compare (set ZF and CF from AND/ANDN of the operands) */
-   if (have66noF2noF3( pfx ) && sz == 2 
+   if (have66noF2noF3( pfx )
+       && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
        && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x17) {
       modrm = insn[3];
       IRTemp vecE = newTemp(Ity_V128);
@@ -16000,6 +16065,92 @@
       goto decode_success;
    }
 
+   /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
+      2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
+   if ( have66noF2noF3( pfx ) 
+        && sz == 2 
+        && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x2B ) {
+  
+      modrm = insn[3];
+
+      IRTemp argL = newTemp(Ity_V128);
+      IRTemp argR = newTemp(Ity_V128);
+
+      if ( epartIsReg(modrm) ) {
+         assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+         delta += 3+1;
+         DIP( "packusdw %s,%s\n",
+              nameXMMReg( eregOfRexRM(pfx, modrm) ),
+              nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+      } else {
+         addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+         gen_SEGV_if_not_16_aligned( addr );
+         assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
+         delta += 3+alen;
+         DIP( "packusdw %s,%s\n",
+              dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+      }
+
+      assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
+
+      putXMMReg( gregOfRexRM(pfx, modrm), 
+                 binop( Iop_QNarrowBin32Sto16Ux8,
+                        mkexpr(argL), mkexpr(argR)) );
+
+      goto decode_success;
+   }
+
+   /* 66 0F 38 28 = PMULUDQ -- signed widening multiply of 32-lanes 0 x
+      0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
+      half */
+   /* This is a really poor translation -- could be improved if
+      performance critical.  It's a copy-paste of PMULDQ, too. */
+   if (have66noF2noF3(pfx) && sz == 2 
+       && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x28) {
+      IRTemp sV, dV;
+      IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
+      sV = newTemp(Ity_V128);
+      dV = newTemp(Ity_V128);
+      s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
+      t1 = newTemp(Ity_I64);
+      t0 = newTemp(Ity_I64);
+      modrm = insn[3];
+      assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
+
+      if (epartIsReg(modrm)) {
+         assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
+         delta += 3+1;
+         DIP("pmuldq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
+                               nameXMMReg(gregOfRexRM(pfx,modrm)));
+      } else {
+         addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+         assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
+         delta += 3+alen;
+         DIP("pmuldq %s,%s\n", dis_buf,
+                               nameXMMReg(gregOfRexRM(pfx,modrm)));
+      }
+
+      breakup128to32s( dV, &d3, &d2, &d1, &d0 );
+      breakup128to32s( sV, &s3, &s2, &s1, &s0 );
+
+      assign( t0, binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) );
+      putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) );
+      assign( t1, binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)) );
+      putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) );
+      goto decode_success;
+   }
+
+   /* 66 0F 38 29 = PCMPEQQ
+      64x2 equality comparison
+   */
+   if ( have66noF2noF3( pfx ) && sz == 2 
+        && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x29) {
+      /* FIXME: this needs an alignment check */
+      delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, 
+                                 "pcmpeqq", Iop_CmpEQ64x2, False );
+      goto decode_success;
+   }
+
    /* ---------------------------------------------------- */
    /* --- end of the SSE4 decoder                      --- */
    /* ---------------------------------------------------- */
@@ -16079,6 +16230,37 @@
 //.. //--       DIP("enter 0x%x, 0x%x", d32, abyte);
 //.. //--       break;
 
+   case 0xC8: /* ENTER */
+      /* Same comments re operand size as for LEAVE below apply.
+         Also, only handles the case "enter $imm16, $0"; other cases
+         for the second operand (nesting depth) are not handled. */
+      if (sz != 4)
+         goto decode_failure;
+      d64 = getUDisp16(delta);
+      delta += 2;
+      vassert(d64 >= 0 && d64 <= 0xFFFF);
+      if (getUChar(delta) != 0)
+         goto decode_failure;
+      delta++;
+      /* Intel docs seem to suggest:
+           push rbp
+           temp = rsp
+           rbp = temp
+           rsp = rsp - imm16
+      */
+      t1 = newTemp(Ity_I64);
+      assign(t1, getIReg64(R_RBP));
+      t2 = newTemp(Ity_I64);
+      assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
+      putIReg64(R_RSP, mkexpr(t2));
+      storeLE(mkexpr(t2), mkexpr(t1));
+      putIReg64(R_RBP, mkexpr(t2));
+      if (d64 > 0) {
+         putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
+      }
+      DIP("enter $%u, $0\n", (UInt)d64);
+      break;
+
    case 0xC9: /* LEAVE */
       /* In 64-bit mode this defaults to a 64-bit operand size.  There
          is no way to encode a 32-bit variant.  Hence sz==4 but we do
@@ -17326,8 +17508,6 @@
    case 0xAE:
    case 0xAF:
       /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
-      if (haveASO(pfx)) 
-         goto decode_failure;
       if (haveF2(pfx) && !haveF3(pfx)) {
          if (opc == 0xAE)
             sz = 1;
@@ -17338,8 +17518,6 @@
          break;
       }
       /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
-      if (haveASO(pfx)) 
-         goto decode_failure;
       if (!haveF2(pfx) && haveF3(pfx)) {
          if (opc == 0xAE)
             sz = 1;
@@ -17362,8 +17540,6 @@
    case 0xA6:
    case 0xA7:
       /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
-      if (haveASO(pfx)) 
-         goto decode_failure;
       if (haveF3(pfx) && !haveF2(pfx)) {
          if (opc == 0xA6)
             sz = 1;
@@ -17379,8 +17555,6 @@
    case 0xAA:
    case 0xAB:
       /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
-      if (haveASO(pfx)) 
-         goto decode_failure;
       if (haveF3(pfx) && !haveF2(pfx)) {
          if (opc == 0xAA)
             sz = 1;
@@ -17403,8 +17577,6 @@
    case 0xA4:
    case 0xA5:
       /* F3 A4: rep movsb */
-      if (haveASO(pfx)) 
-         goto decode_failure;
       if (haveF3(pfx) && !haveF2(pfx)) {
          if (opc == 0xA4)
             sz = 1;
@@ -17493,9 +17665,6 @@
       /* guard against mutancy */
       if (haveF2orF3(pfx)) goto decode_failure;
 
-      /* sz == 2 could legitimately happen, but we don't handle it yet */
-      if (sz == 2) goto decode_failure; /* awaiting test case */
-
       codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
       break;
 
@@ -18559,13 +18728,15 @@
   decode_failure:
    /* All decode failures end up here. */
    vex_printf("vex amd64->IR: unhandled instruction bytes: "
-              "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+              "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
               (Int)getUChar(delta_start+0),
               (Int)getUChar(delta_start+1),
               (Int)getUChar(delta_start+2),
               (Int)getUChar(delta_start+3),
               (Int)getUChar(delta_start+4),
-              (Int)getUChar(delta_start+5) );
+              (Int)getUChar(delta_start+5),
+              (Int)getUChar(delta_start+6),
+              (Int)getUChar(delta_start+7) );
 
    /* Tell the dispatcher that this insn cannot be decoded, and so has
       not been executed, and (is currently) the next to be executed.
diff --git a/main/VEX/priv/guest_arm_defs.h b/main/VEX/priv/guest_arm_defs.h
index 02078c4..be6dd1c 100644
--- a/main/VEX/priv/guest_arm_defs.h
+++ b/main/VEX/priv/guest_arm_defs.h
@@ -6,7 +6,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -148,6 +148,10 @@
    that the definedness of the stored flags always depends on
    all 3 DEP values.
 
+   Fields carrying only 1 or 2 bits of useful information (old_C,
+   shifter_co, old_V, oldC:oldV) must have their top 31 or 30 bits
+   (respectively) zero.  The text "31x0:" or "30x0:" denotes this.
+
    A summary of the field usages is:
 
    OP                DEP1              DEP2              DEP3
@@ -156,11 +160,11 @@
    OP_COPY           current NZCV      unused            unused
    OP_ADD            argL              argR              unused
    OP_SUB            argL              argR              unused
-   OP_ADC            argL              argR              old_C
-   OP_SBB            argL              argR              old_C
-   OP_LOGIC          result            shifter_co        old_V
-   OP_MUL            result            unused            old_C:old_V
-   OP_MULL           resLO32           resHI32           old_C:old_V
+   OP_ADC            argL              argR              31x0:old_C
+   OP_SBB            argL              argR              31x0:old_C
+   OP_LOGIC          result            31x0:shifter_co   31x0:old_V
+   OP_MUL            result            unused            30x0:old_C:old_V
+   OP_MULL           resLO32           resHI32           30x0:old_C:old_V
 */
 
 enum {
diff --git a/main/VEX/priv/guest_arm_helpers.c b/main/VEX/priv/guest_arm_helpers.c
index f6689a0..90ec93f 100644
--- a/main/VEX/priv/guest_arm_helpers.c
+++ b/main/VEX/priv/guest_arm_helpers.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -51,14 +51,381 @@
 */
 
 
+/* Set to 1 to get detailed profiling info about individual N, Z, C
+   and V flag evaluation. */
+#define PROFILE_NZCV_FLAGS 0
 
-/* generalised left-shifter */
-static inline UInt lshift ( UInt x, Int n )
+#if PROFILE_NZCV_FLAGS
+
+static UInt tab_n_eval[ARMG_CC_OP_NUMBER];
+static UInt tab_z_eval[ARMG_CC_OP_NUMBER];
+static UInt tab_c_eval[ARMG_CC_OP_NUMBER];
+static UInt tab_v_eval[ARMG_CC_OP_NUMBER];
+static UInt initted = 0;
+static UInt tot_evals = 0;
+
+static void initCounts ( void )
 {
-   if (n >= 0)
-      return x << n;
-   else
-      return x >> (-n);
+   UInt i;
+   for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
+      tab_n_eval[i] = tab_z_eval[i] = tab_c_eval[i] = tab_v_eval[i] = 0;
+   }
+   initted = 1;
+}
+
+static void showCounts ( void )
+{
+   UInt i;
+   vex_printf("\n                 N          Z          C          V\n");
+   vex_printf(  "---------------------------------------------------\n");
+   for (i = 0; i < ARMG_CC_OP_NUMBER; i++) {
+      vex_printf("CC_OP=%d  %9d  %9d  %9d  %9d\n",
+                 i,
+                 tab_n_eval[i], tab_z_eval[i],
+                 tab_c_eval[i], tab_v_eval[i] );
+    }
+}
+
+#define NOTE_N_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_n_eval)
+#define NOTE_Z_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_z_eval)
+#define NOTE_C_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_c_eval)
+#define NOTE_V_EVAL(_cc_op) NOTE_EVAL(_cc_op, tab_v_eval)
+
+#define NOTE_EVAL(_cc_op, _tab) \
+   do { \
+      if (!initted) initCounts(); \
+      vassert( ((UInt)(_cc_op)) < ARMG_CC_OP_NUMBER); \
+      _tab[(UInt)(_cc_op)]++; \
+      tot_evals++; \
+      if (0 == (tot_evals & 0xFFFFF)) \
+        showCounts(); \
+   } while (0)
+
+#endif /* PROFILE_NZCV_FLAGS */
+
+
+/* Calculate the N flag from the supplied thunk components, in the
+   least significant bit of the word.  Returned bits 31:1 are zero. */
+static
+UInt armg_calculate_flag_n ( UInt cc_op, UInt cc_dep1,
+                             UInt cc_dep2, UInt cc_dep3 )
+{
+#  if PROFILE_NZCV_FLAGS
+   NOTE_N_EVAL(cc_op);
+#  endif
+
+   switch (cc_op) {
+      case ARMG_CC_OP_COPY: {
+         /* (nzcv:28x0, unused, unused) */
+         UInt nf   = (cc_dep1 >> ARMG_CC_SHIFT_N) & 1;
+         return nf;
+      }
+      case ARMG_CC_OP_ADD: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL + argR;
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_SUB: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL - argR;
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_ADC: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL + argR + oldC;
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_SBB: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL - argR - (oldC ^ 1);
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_LOGIC: {
+         /* (res, shco, oldV) */
+         UInt res  = cc_dep1;
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_MUL: {
+         /* (res, unused, oldC:oldV) */
+         UInt res  = cc_dep1;
+         UInt nf   = res >> 31;
+         return nf;
+      }
+      case ARMG_CC_OP_MULL: {
+         /* (resLo32, resHi32, oldC:oldV) */
+         UInt resHi32 = cc_dep2;
+         UInt nf      = resHi32 >> 31;
+         return nf;
+      }
+      default:
+         /* shouldn't really make these calls from generated code */
+         vex_printf("armg_calculate_flag_n"
+                    "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
+                    cc_op, cc_dep1, cc_dep2, cc_dep3 );
+         vpanic("armg_calculate_flags_n");
+   }
+}
+
+
+/* Calculate the Z flag from the supplied thunk components, in the
+   least significant bit of the word.  Returned bits 31:1 are zero. */
+static
+UInt armg_calculate_flag_z ( UInt cc_op, UInt cc_dep1,
+                             UInt cc_dep2, UInt cc_dep3 )
+{
+#  if PROFILE_NZCV_FLAGS
+   NOTE_Z_EVAL(cc_op);
+#  endif
+
+   switch (cc_op) {
+      case ARMG_CC_OP_COPY: {
+         /* (nzcv:28x0, unused, unused) */
+         UInt zf   = (cc_dep1 >> ARMG_CC_SHIFT_Z) & 1;
+         return zf;
+      }
+      case ARMG_CC_OP_ADD: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL + argR;
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_SUB: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL - argR;
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_ADC: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL + argR + oldC;
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_SBB: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL - argR - (oldC ^ 1);
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_LOGIC: {
+         /* (res, shco, oldV) */
+         UInt res  = cc_dep1;
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_MUL: {
+         /* (res, unused, oldC:oldV) */
+         UInt res  = cc_dep1;
+         UInt zf   = res == 0;
+         return zf;
+      }
+      case ARMG_CC_OP_MULL: {
+         /* (resLo32, resHi32, oldC:oldV) */
+         UInt resLo32 = cc_dep1;
+         UInt resHi32 = cc_dep2;
+         UInt zf      = (resHi32|resLo32) == 0;
+         return zf;
+      }
+      default:
+         /* shouldn't really make these calls from generated code */
+         vex_printf("armg_calculate_flags_z"
+                    "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
+                    cc_op, cc_dep1, cc_dep2, cc_dep3 );
+         vpanic("armg_calculate_flags_z");
+   }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the C flag from the supplied thunk components, in the
+   least significant bit of the word.  Returned bits 31:1 are zero. */
+UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
+                             UInt cc_dep2, UInt cc_dep3 )
+{
+#  if PROFILE_NZCV_FLAGS
+   NOTE_C_EVAL(cc_op);
+#  endif
+
+   switch (cc_op) {
+      case ARMG_CC_OP_COPY: {
+         /* (nzcv:28x0, unused, unused) */
+         UInt cf   = (cc_dep1 >> ARMG_CC_SHIFT_C) & 1;
+         return cf;
+      }
+      case ARMG_CC_OP_ADD: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL + argR;
+         UInt cf   = res < argL;
+         return cf;
+      }
+      case ARMG_CC_OP_SUB: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt cf   = argL >= argR;
+         return cf;
+      }
+      case ARMG_CC_OP_ADC: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL + argR + oldC;
+         UInt cf   = oldC ? (res <= argL) : (res < argL);
+         return cf;
+      }
+      case ARMG_CC_OP_SBB: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt cf   = oldC ? (argL >= argR) : (argL > argR);
+         return cf;
+      }
+      case ARMG_CC_OP_LOGIC: {
+         /* (res, shco, oldV) */
+         UInt shco = cc_dep2;
+         vassert((shco & ~1) == 0);
+         UInt cf   = shco;
+         return cf;
+      }
+      case ARMG_CC_OP_MUL: {
+         /* (res, unused, oldC:oldV) */
+         UInt oldC = (cc_dep3 >> 1) & 1;
+         vassert((cc_dep3 & ~3) == 0);
+         UInt cf   = oldC;
+         return cf;
+      }
+      case ARMG_CC_OP_MULL: {
+         /* (resLo32, resHi32, oldC:oldV) */
+         UInt oldC    = (cc_dep3 >> 1) & 1;
+         vassert((cc_dep3 & ~3) == 0);
+         UInt cf      = oldC;
+         return cf;
+      }
+      default:
+         /* shouldn't really make these calls from generated code */
+         vex_printf("armg_calculate_flag_c"
+                    "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
+                    cc_op, cc_dep1, cc_dep2, cc_dep3 );
+         vpanic("armg_calculate_flag_c");
+   }
+}
+
+
+/* CALLED FROM GENERATED CODE: CLEAN HELPER */
+/* Calculate the V flag from the supplied thunk components, in the
+   least significant bit of the word.  Returned bits 31:1 are zero. */
+UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
+                             UInt cc_dep2, UInt cc_dep3 )
+{
+#  if PROFILE_NZCV_FLAGS
+   NOTE_V_EVAL(cc_op);
+#  endif
+
+   switch (cc_op) {
+      case ARMG_CC_OP_COPY: {
+         /* (nzcv:28x0, unused, unused) */
+         UInt vf   = (cc_dep1 >> ARMG_CC_SHIFT_V) & 1;
+         return vf;
+      }
+      case ARMG_CC_OP_ADD: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL + argR;
+         UInt vf   = ((res ^ argL) & (res ^ argR)) >> 31;
+         return vf;
+      }
+      case ARMG_CC_OP_SUB: {
+         /* (argL, argR, unused) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt res  = argL - argR;
+         UInt vf   = ((argL ^ argR) & (argL ^ res)) >> 31;
+         return vf;
+      }
+      case ARMG_CC_OP_ADC: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL + argR + oldC;
+         UInt vf   = ((res ^ argL) & (res ^ argR)) >> 31;
+         return vf;
+      }
+      case ARMG_CC_OP_SBB: {
+         /* (argL, argR, oldC) */
+         UInt argL = cc_dep1;
+         UInt argR = cc_dep2;
+         UInt oldC = cc_dep3;
+         vassert((oldC & ~1) == 0);
+         UInt res  = argL - argR - (oldC ^ 1);
+         UInt vf   = ((argL ^ argR) & (argL ^ res)) >> 31;
+         return vf;
+      }
+      case ARMG_CC_OP_LOGIC: {
+         /* (res, shco, oldV) */
+         UInt oldV = cc_dep3;
+         vassert((oldV & ~1) == 0);
+         UInt vf   = oldV;
+         return vf;
+      }
+      case ARMG_CC_OP_MUL: {
+         /* (res, unused, oldC:oldV) */
+         UInt oldV = (cc_dep3 >> 0) & 1;
+         vassert((cc_dep3 & ~3) == 0);
+         UInt vf   = oldV;
+         return vf;
+      }
+      case ARMG_CC_OP_MULL: {
+         /* (resLo32, resHi32, oldC:oldV) */
+         UInt oldV    = (cc_dep3 >> 0) & 1;
+         vassert((cc_dep3 & ~3) == 0);
+         UInt vf      = oldV;
+         return vf;
+      }
+      default:
+         /* shouldn't really make these calls from generated code */
+         vex_printf("armg_calculate_flag_v"
+                    "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
+                    cc_op, cc_dep1, cc_dep2, cc_dep3 );
+         vpanic("armg_calculate_flag_v");
+   }
 }
 
 
@@ -69,142 +436,21 @@
 UInt armg_calculate_flags_nzcv ( UInt cc_op, UInt cc_dep1,
                                  UInt cc_dep2, UInt cc_dep3 )
 {
-   switch (cc_op) {
-      case ARMG_CC_OP_COPY:
-         /* (nzcv, unused, unused) */
-         return cc_dep1;
-      case ARMG_CC_OP_ADD: {
-         /* (argL, argR, unused) */
-         UInt argL = cc_dep1;
-         UInt argR = cc_dep2;
-         UInt res  = argL + argR;
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         // CF and VF need verification
-         UInt cf   = lshift( res < argL, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( (res ^ argL) & (res ^ argR),
-                             ARMG_CC_SHIFT_V + 1 - 32 )
-                     & ARMG_CC_MASK_V;
-         //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
-         //           argL, argR, nf, zf, cf, vf);
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_SUB: {
-         /* (argL, argR, unused) */
-         UInt argL = cc_dep1;
-         UInt argR = cc_dep2;
-         UInt res  = argL - argR;
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         // XXX cf is inverted relative to normal sense
-         UInt cf   = lshift( argL >= argR, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( (argL ^ argR) & (argL ^ res),
-                             ARMG_CC_SHIFT_V + 1 - 32 )
-                     & ARMG_CC_MASK_V;
-         //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
-         //           argL, argR, nf, zf, cf, vf);
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_ADC: {
-         /* (argL, argR, oldC) */
-         UInt argL = cc_dep1;
-         UInt argR = cc_dep2;
-         UInt oldC = cc_dep3;
-         UInt res  = (argL + argR) + oldC;
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = oldC ? lshift( res <= argL, ARMG_CC_SHIFT_C )
-                          : lshift( res <  argL, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( (res ^ argL) & (res ^ argR),
-                             ARMG_CC_SHIFT_V + 1 - 32 )
-                     & ARMG_CC_MASK_V;
-         //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
-         //           argL, argR, nf, zf, cf, vf);
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_SBB: {
-         /* (argL, argR, oldC) */
-         UInt argL = cc_dep1;
-         UInt argR = cc_dep2;
-         UInt oldC = cc_dep3;
-         UInt res  = argL - argR - (oldC ^ 1);
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = oldC ? lshift( argL >= argR, ARMG_CC_SHIFT_C )
-                          : lshift( argL >  argR, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( (argL ^ argR) & (argL ^ res),
-                             ARMG_CC_SHIFT_V + 1 - 32 )
-                     & ARMG_CC_MASK_V;
-         //vex_printf("%08x %08x -> n %x z %x c %x v %x\n",
-         //           argL, argR, nf, zf, cf, vf);
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_LOGIC: {
-         /* (res, shco, oldV) */
-         UInt res  = cc_dep1;
-         UInt shco = cc_dep2;
-         UInt oldV = cc_dep3;
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = lshift( shco & 1, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( oldV & 1, ARMG_CC_SHIFT_V );
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_MUL: {
-         /* (res, unused, oldC:oldV) */
-         UInt res  = cc_dep1;
-         UInt oldC = (cc_dep3 >> 1) & 1;
-         UInt oldV = (cc_dep3 >> 0) & 1;
-         UInt nf   = lshift( res & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf   = lshift( res == 0, ARMG_CC_SHIFT_Z );
-         UInt cf   = lshift( oldC & 1, ARMG_CC_SHIFT_C );
-         UInt vf   = lshift( oldV & 1, ARMG_CC_SHIFT_V );
-         return nf | zf | cf | vf;
-      }
-      case ARMG_CC_OP_MULL: {
-         /* (resLo32, resHi32, oldC:oldV) */
-         UInt resLo32 = cc_dep1;
-         UInt resHi32 = cc_dep2;
-         UInt oldC    = (cc_dep3 >> 1) & 1;
-         UInt oldV    = (cc_dep3 >> 0) & 1;
-         UInt nf      = lshift( resHi32 & (1<<31), ARMG_CC_SHIFT_N - 31 );
-         UInt zf      = lshift( (resHi32|resLo32) == 0, ARMG_CC_SHIFT_Z );
-         UInt cf      = lshift( oldC & 1, ARMG_CC_SHIFT_C );
-         UInt vf      = lshift( oldV & 1, ARMG_CC_SHIFT_V );
-         return nf | zf | cf | vf;
-      }
-      default:
-         /* shouldn't really make these calls from generated code */
-         vex_printf("armg_calculate_flags_nzcv"
-                    "( op=%u, dep1=0x%x, dep2=0x%x, dep3=0x%x )\n",
-                    cc_op, cc_dep1, cc_dep2, cc_dep3 );
-         vpanic("armg_calculate_flags_nzcv");
-   }
+   UInt f;
+   UInt res = 0;
+   f = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+   res |= (f << ARMG_CC_SHIFT_N);
+   f = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+   res |= (f << ARMG_CC_SHIFT_Z);
+   f = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+   res |= (f << ARMG_CC_SHIFT_C);
+   f = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+   res |= (f << ARMG_CC_SHIFT_V);
+   return res;
 }
 
 
 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
-/* Calculate the C flag from the thunk components, in the lowest bit
-   of the word (bit 0). */
-UInt armg_calculate_flag_c ( UInt cc_op, UInt cc_dep1,
-                             UInt cc_dep2, UInt cc_dep3 )
-{
-   UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
-   return (r >> ARMG_CC_SHIFT_C) & 1;
-}
-
-
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
-/* Calculate the V flag from the thunk components, in the lowest bit
-   of the word (bit 0). */
-UInt armg_calculate_flag_v ( UInt cc_op, UInt cc_dep1,
-                             UInt cc_dep2, UInt cc_dep3 )
-{
-   UInt r = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
-   return (r >> ARMG_CC_SHIFT_V) & 1;
-}
-
-/* CALLED FROM GENERATED CODE: CLEAN HELPER */
 /* Calculate the QC flag from the arguments, in the lowest bit
    of the word (bit 0).  Urr, having this out of line is bizarre.
    Push back inline. */
@@ -219,15 +465,14 @@
 
 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 /* Calculate the specified condition from the thunk components, in the
-   lowest bit of the word (bit 0). */
-extern 
-UInt armg_calculate_condition ( UInt cond_n_op /* ARMCondcode << 4 | cc_op */,
+   lowest bit of the word (bit 0).  Returned bits 31:1 are zero. */
+UInt armg_calculate_condition ( UInt cond_n_op /* (ARMCondcode << 4) | cc_op */,
                                 UInt cc_dep1,
                                 UInt cc_dep2, UInt cc_dep3 )
 {
    UInt cond  = cond_n_op >> 4;
    UInt cc_op = cond_n_op & 0xF;
-   UInt nf, zf, vf, cf, nzcv, inv;
+   UInt nf, zf, vf, cf, inv;
    //   vex_printf("XXXXXXXX %x %x %x %x\n", 
    //              cond_n_op, cc_dep1, cc_dep2, cc_dep3);
 
@@ -235,47 +480,46 @@
    if (cond == ARMCondAL) return 1;
 
    inv  = cond & 1;
-   nzcv = armg_calculate_flags_nzcv(cc_op, cc_dep1, cc_dep2, cc_dep3);
 
    switch (cond) {
       case ARMCondEQ:    // Z=1         => z
       case ARMCondNE:    // Z=0
-         zf = nzcv >> ARMG_CC_SHIFT_Z;
-         return 1 & (inv ^ zf);
+         zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ zf;
 
       case ARMCondHS:    // C=1         => c
       case ARMCondLO:    // C=0
-         cf = nzcv >> ARMG_CC_SHIFT_C;
-         return 1 & (inv ^ cf);
+         cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ cf;
 
       case ARMCondMI:    // N=1         => n
       case ARMCondPL:    // N=0
-         nf = nzcv >> ARMG_CC_SHIFT_N;
-         return 1 & (inv ^ nf);
+         nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ nf;
 
       case ARMCondVS:    // V=1         => v
       case ARMCondVC:    // V=0
-         vf = nzcv >> ARMG_CC_SHIFT_V;
-         return 1 & (inv ^ vf);
+         vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ vf;
 
       case ARMCondHI:    // C=1 && Z=0   => c & ~z
       case ARMCondLS:    // C=0 || Z=1
-         cf = nzcv >> ARMG_CC_SHIFT_C;
-         zf = nzcv >> ARMG_CC_SHIFT_Z;
-         return 1 & (inv ^ (cf & ~zf));
+         cf = armg_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ (cf & ~zf);
 
       case ARMCondGE:    // N=V          => ~(n^v)
       case ARMCondLT:    // N!=V
-         nf = nzcv >> ARMG_CC_SHIFT_N;
-         vf = nzcv >> ARMG_CC_SHIFT_V;
-         return 1 & (inv ^ ~(nf ^ vf));
+         nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ (1 & ~(nf ^ vf));
 
       case ARMCondGT:    // Z=0 && N=V   => ~z & ~(n^v)  =>  ~(z | (n^v))
       case ARMCondLE:    // Z=1 || N!=V
-         nf = nzcv >> ARMG_CC_SHIFT_N;
-         vf = nzcv >> ARMG_CC_SHIFT_V;
-         zf = nzcv >> ARMG_CC_SHIFT_Z;
-         return 1 & (inv ^ ~(zf | (nf ^ vf)));
+         nf = armg_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         vf = armg_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         zf = armg_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
+         return inv ^ (1 & ~(zf | (nf ^ vf)));
 
       case ARMCondAL: // handled above
       case ARMCondNV: // should never get here: Illegal instr
@@ -332,13 +576,17 @@
    /* --------- specialising "armg_calculate_condition" --------- */
 
    if (vex_streq(function_name, "armg_calculate_condition")) {
-      /* specialise calls to above "armg_calculate condition" function */
-      IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_dep3;
+
+      /* specialise calls to the "armg_calculate_condition" function.
+         Not sure whether this is strictly necessary, but: the
+         replacement IR must produce only the values 0 or 1.  Bits
+         31:1 are required to be zero. */
+      IRExpr *cond_n_op, *cc_dep1, *cc_dep2, *cc_ndep;
       vassert(arity == 4);
-      cond_n_op = args[0]; /* ARMCondcode << 4  |  ARMG_CC_OP_* */
+      cond_n_op = args[0]; /* (ARMCondcode << 4)  |  ARMG_CC_OP_* */
       cc_dep1   = args[1];
       cc_dep2   = args[2];
-      cc_dep3   = args[3];
+      cc_ndep   = args[3];
 
       /*---------------- SUB ----------------*/
 
@@ -353,6 +601,12 @@
                      binop(Iop_CmpNE32, cc_dep1, cc_dep2));
       }
 
+      if (isU32(cond_n_op, (ARMCondGT << 4) | ARMG_CC_OP_SUB)) {
+         /* GT after SUB --> test argL >s argR
+                         --> test argR <s argL */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpLT32S, cc_dep2, cc_dep1));
+      }
       if (isU32(cond_n_op, (ARMCondLE << 4) | ARMG_CC_OP_SUB)) {
          /* LE after SUB --> test argL <=s argR */
          return unop(Iop_1Uto32,
@@ -378,6 +632,11 @@
          return unop(Iop_1Uto32,
                      binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
       }
+      if (isU32(cond_n_op, (ARMCondLO << 4) | ARMG_CC_OP_SUB)) {
+         /* LO after SUB --> test argL <u argR */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpLT32U, cc_dep1, cc_dep2));
+      }
 
       if (isU32(cond_n_op, (ARMCondLS << 4) | ARMG_CC_OP_SUB)) {
          /* LS after SUB --> test argL <=u argR */
@@ -385,7 +644,27 @@
                      binop(Iop_CmpLE32U, cc_dep1, cc_dep2));
       }
 
+      /*---------------- SBB ----------------*/
+
+      if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
+         /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* HS after SBB (same as C after SBB below)
+            --> oldC ? (argL >=u argR) : (argL >u argR)
+            --> oldC ? (argR <=u argL) : (argR <u argL)
+         */
+         return
+            IRExpr_Mux0X(
+               unop(Iop_32to8, cc_ndep),
+               /* case oldC == 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1)),
+               /* case oldC != 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1))
+            );
+      }
+
       /*---------------- LOGIC ----------------*/
+
       if (isU32(cond_n_op, (ARMCondEQ << 4) | ARMG_CC_OP_LOGIC)) {
          /* EQ after LOGIC --> test res == 0 */
          return unop(Iop_1Uto32,
@@ -397,7 +676,23 @@
                      binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
       }
 
+      if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
+         /* PL after LOGIC --> test (res >> 31) == 0 */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpEQ32,
+                           binop(Iop_Shr32, cc_dep1, mkU8(31)),
+                           mkU32(0)));
+      }
+      if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
+         /* MI after LOGIC --> test (res >> 31) == 1 */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpEQ32,
+                           binop(Iop_Shr32, cc_dep1, mkU8(31)),
+                           mkU32(1)));
+      }
+
       /*----------------- AL -----------------*/
+
       /* A critically important case for Thumb code.
 
          What we're trying to spot is the case where cond_n_op is an
@@ -444,6 +739,126 @@
       }
    }
 
+   /* --------- specialising "armg_calculate_flag_c" --------- */
+
+   else
+   if (vex_streq(function_name, "armg_calculate_flag_c")) {
+
+      /* specialise calls to the "armg_calculate_flag_c" function.
+         Note that the returned value must be either 0 or 1; nonzero
+         bits 31:1 are not allowed.  In turn, incoming oldV and oldC
+         values (from the thunk) are assumed to have bits 31:1
+         clear. */
+      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+      vassert(arity == 4);
+      cc_op   = args[0]; /* ARMG_CC_OP_* */
+      cc_dep1 = args[1];
+      cc_dep2 = args[2];
+      cc_ndep = args[3];
+
+      if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+         /* Thunk args are (result, shco, oldV) */
+         /* C after LOGIC --> shco */
+         return cc_dep2;
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SUB)) {
+         /* Thunk args are (argL, argR, unused) */
+         /* C after SUB --> argL >=u argR
+                        --> argR <=u argL */
+         return unop(Iop_1Uto32,
+                     binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+         /* This happens occasionally in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* C after SBB (same as HS after SBB above)
+            --> oldC ? (argL >=u argR) : (argL >u argR)
+            --> oldC ? (argR <=u argL) : (argR <u argL)
+         */
+         return
+            IRExpr_Mux0X(
+               unop(Iop_32to8, cc_ndep),
+               /* case oldC == 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1)),
+               /* case oldC != 0 */
+               unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1))
+            );
+      }
+
+   }
+
+   /* --------- specialising "armg_calculate_flag_v" --------- */
+
+   else
+   if (vex_streq(function_name, "armg_calculate_flag_v")) {
+
+      /* specialise calls to the "armg_calculate_flag_v" function.
+         Note that the returned value must be either 0 or 1; nonzero
+         bits 31:1 are not allowed.  In turn, incoming oldV and oldC
+         values (from the thunk) are assumed to have bits 31:1
+         clear. */
+      IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
+      vassert(arity == 4);
+      cc_op   = args[0]; /* ARMG_CC_OP_* */
+      cc_dep1 = args[1];
+      cc_dep2 = args[2];
+      cc_ndep = args[3];
+
+      if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
+         /* Thunk args are (result, shco, oldV) */
+         /* V after LOGIC --> oldV */
+         return cc_ndep;
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SUB)) {
+         /* Thunk args are (argL, argR, unused) */
+         /* V after SUB 
+            --> let res = argL - argR
+                in ((argL ^ argR) & (argL ^ res)) >> 31
+            --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
+         */
+         IRExpr* argL = cc_dep1;
+         IRExpr* argR = cc_dep2;
+         return
+            binop(Iop_Shr32,
+                  binop(Iop_And32,
+                        binop(Iop_Xor32, argL, argR),
+                        binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
+                  ),
+                  mkU8(31)
+            );
+      }
+
+      if (isU32(cc_op, ARMG_CC_OP_SBB)) {
+         /* This happens occasionally in softfloat code, eg __divdf3+140 */
+         /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
+         /* V after SBB
+            --> let res = argL - argR - (oldC ^ 1)
+                in  (argL ^ argR) & (argL ^ res) & 1
+         */
+         return
+            binop(
+               Iop_And32,
+               binop(
+                  Iop_And32,
+                  // argL ^ argR
+                  binop(Iop_Xor32, cc_dep1, cc_dep2),
+                  // argL ^ (argL - argR - (oldC ^ 1))
+                  binop(Iop_Xor32,
+                        cc_dep1,
+                        binop(Iop_Sub32,
+                              binop(Iop_Sub32, cc_dep1, cc_dep2),
+                              binop(Iop_Xor32, cc_ndep, mkU32(1)))
+                  )
+               ),
+               mkU32(1)
+            );
+      }
+
+   }
+
 #  undef unop
 #  undef binop
 #  undef mkU32
diff --git a/main/VEX/priv/guest_arm_toIR.c b/main/VEX/priv/guest_arm_toIR.c
index b38877b..4fd47db 100644
--- a/main/VEX/priv/guest_arm_toIR.c
+++ b/main/VEX/priv/guest_arm_toIR.c
@@ -7,11 +7,11 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    NEON support is
-   Copyright (C) 2010-2010 Samsung Electronics
+   Copyright (C) 2010-2011 Samsung Electronics
    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
               and Kirill Batuzov <batuzovk@ispras.ru>
 
@@ -1088,14 +1088,13 @@
 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
 {
    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
-   /* And 'cond' had better produce a value in which only bits 7:4
-      bits are nonzero.  However, obviously we can't assert for
-      that. */
+   /* And 'cond' had better produce a value in which only bits 7:4 are
+      nonzero.  However, obviously we can't assert for that. */
 
    /* So what we're constructing for the first argument is 
-      "(cond << 4) | stored-operation-operation".  However,
-      as per comments above, must be supplied pre-shifted to this
-      function.
+      "(cond << 4) | stored-operation".
+      However, as per comments above, 'cond' must be supplied
+      pre-shifted to this function.
 
       This pairing scheme requires that the ARM_CC_OP_ values all fit
       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
@@ -1700,6 +1699,12 @@
 
    The calling convention for res and newC is a bit funny.  They could
    be passed by value, but instead are passed by ref.
+
+   The C (shco) value computed must be zero in bits 31:1, as the IR
+   optimisations for flag handling (guest_arm_spechelper) rely on
+   that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
+   for it.  Same applies to all these functions that compute shco
+   after a shift or rotate, not just this one.
 */
 
 static void compute_result_and_C_after_LSL_by_imm5 (
@@ -1751,7 +1756,7 @@
       /* mux0X(amt == 0,
                mux0X(amt < 32, 
                      0,
-                     Rm[(32-amt) & 31])
+                     Rm[(32-amt) & 31]),
                oldC)
       */
       /* About the best you can do is pray that iropt is able
@@ -1767,16 +1772,19 @@
                unop(Iop_1Uto8,
                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
                mkU32(0),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkU32(32),
-                                      mkexpr(amtT)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkU32(32),
+                                            mkexpr(amtT)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                )
             ),
             mkexpr(oldC)
@@ -1862,7 +1870,7 @@
       /* mux0X(amt == 0,
                mux0X(amt < 32, 
                      0,
-                     Rm[(amt-1) & 31])
+                     Rm[(amt-1) & 31]),
                oldC)
       */
       IRTemp oldC = newTemp(Ity_I32);
@@ -1876,16 +1884,19 @@
                unop(Iop_1Uto8,
                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
                mkU32(0),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkexpr(amtT),
-                                      mkU32(1)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkexpr(amtT),
+                                            mkU32(1)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                )
             ),
             mkexpr(oldC)
@@ -1984,20 +1995,26 @@
             IRExpr_Mux0X(
                unop(Iop_1Uto8,
                     binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32))),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     mkU8(31)
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           mkU8(31)
+                     ),
+                     mkU32(1)
                ),
-               binop(Iop_Shr32,
-                     mkexpr(rMt),
-                     unop(Iop_32to8,
-                          binop(Iop_And32,
-                                binop(Iop_Sub32,
-                                      mkexpr(amtT),
-                                      mkU32(1)),
-                                mkU32(31)
-                          )
-                     )
+               binop(Iop_And32,
+                     binop(Iop_Shr32,
+                           mkexpr(rMt),
+                           unop(Iop_32to8,
+                                binop(Iop_And32,
+                                      binop(Iop_Sub32,
+                                            mkexpr(amtT),
+                                            mkU32(1)),
+                                      mkU32(31)
+                                )
+                           )
+                     ),
+                     mkU32(1)
                )
             ),
             mkexpr(oldC)
@@ -3794,7 +3811,7 @@
       case 5:
          if (B == 0) {
             /* VRSHL */
-            IROp op, op_shrn, op_shln, cmp_gt, op_sub, op_add;
+            IROp op, op_shrn, op_shln, cmp_gt, op_add;
             IRTemp shval, old_shval, imm_val, round;
             UInt i;
             ULong imm;
@@ -3814,28 +3831,24 @@
                switch (size) {
                   case 0:
                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
-                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                      break;
                   case 1:
                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
-                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                      break;
                   case 2:
                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
-                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                      break;
                   case 3:
                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
-                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
@@ -3847,28 +3860,24 @@
                switch (size) {
                   case 0:
                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
-                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
                      break;
                   case 1:
                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
-                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
                      break;
                   case 2:
                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
-                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
                      break;
                   case 3:
                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
-                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
@@ -3939,7 +3948,7 @@
                 nreg);
          } else {
             /* VQRSHL */
-            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_sub, op_add;
+            IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
             IRTemp tmp, shval, mask, old_shval, imm_val, round;
             UInt i;
             ULong esize, imm;
@@ -3960,7 +3969,6 @@
                switch (size) {
                   case 0:
                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
-                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
@@ -3968,7 +3976,6 @@
                      break;
                   case 1:
                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
-                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
@@ -3976,7 +3983,6 @@
                      break;
                   case 2:
                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
-                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
@@ -3984,7 +3990,6 @@
                      break;
                   case 3:
                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
-                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
@@ -3997,7 +4002,6 @@
                switch (size) {
                   case 0:
                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
-                     op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
@@ -4005,7 +4009,6 @@
                      break;
                   case 1:
                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
-                     op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
@@ -4013,7 +4016,6 @@
                      break;
                   case 2:
                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
-                     op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
@@ -4021,7 +4023,6 @@
                      break;
                   case 3:
                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
-                     op_sub = Q ? Iop_Sub64x2 : Iop_Sub64;
                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
@@ -4814,15 +4815,15 @@
          size = B;
          switch (size) {
             case 0:
-               cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
                break;
             case 1:
-               cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
                break;
             case 2:
-               cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
                break;
             case 3:
@@ -4859,7 +4860,7 @@
          switch (size) {
             case 0:
                op = Iop_Add16x8;
-               cvt = Iop_Shorten16x8;
+               cvt = Iop_NarrowUn16to8x8;
                sh = Iop_ShrN16x8;
                imm = 1U << 7;
                imm = (imm << 16) | imm;
@@ -4867,14 +4868,14 @@
                break;
             case 1:
                op = Iop_Add32x4;
-               cvt = Iop_Shorten32x4;
+               cvt = Iop_NarrowUn32to16x4;
                sh = Iop_ShrN32x4;
                imm = 1U << 15;
                imm = (imm << 32) | imm;
                break;
             case 2:
                op = Iop_Add64x2;
-               cvt = Iop_Shorten64x2;
+               cvt = Iop_NarrowUn64to32x2;
                sh = Iop_ShrN64x2;
                imm = 1U << 31;
                break;
@@ -4909,22 +4910,22 @@
          switch (size) {
             case 0:
                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
-               cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
-               cvt2 = Iop_Longen8Sx8;
+               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
+               cvt2 = Iop_Widen8Sto16x8;
                op = Iop_Sub16x8;
                op2 = Iop_Add16x8;
                break;
             case 1:
                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
-               cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
-               cvt2 = Iop_Longen16Sx4;
+               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
+               cvt2 = Iop_Widen16Sto32x4;
                op = Iop_Sub32x4;
                op2 = Iop_Add32x4;
                break;
             case 2:
                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
-               cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
-               cvt2 = Iop_Longen32Sx2;
+               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
+               cvt2 = Iop_Widen32Sto64x2;
                op = Iop_Sub64x2;
                op2 = Iop_Add64x2;
                break;
@@ -4967,7 +4968,7 @@
             case 0:
                op = Iop_Sub16x8;
                op2 = Iop_Add16x8;
-               cvt = Iop_Shorten16x8;
+               cvt = Iop_NarrowUn16to8x8;
                sh = Iop_ShrN16x8;
                imm = 1U << 7;
                imm = (imm << 16) | imm;
@@ -4976,7 +4977,7 @@
             case 1:
                op = Iop_Sub32x4;
                op2 = Iop_Add32x4;
-               cvt = Iop_Shorten32x4;
+               cvt = Iop_NarrowUn32to16x4;
                sh = Iop_ShrN32x4;
                imm = 1U << 15;
                imm = (imm << 32) | imm;
@@ -4984,7 +4985,7 @@
             case 2:
                op = Iop_Sub64x2;
                op2 = Iop_Add64x2;
-               cvt = Iop_Shorten64x2;
+               cvt = Iop_NarrowUn64to32x2;
                sh = Iop_ShrN64x2;
                imm = 1U << 31;
                break;
@@ -5019,20 +5020,20 @@
          switch (size) {
             case 0:
                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
-               cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
-               cvt2 = Iop_Longen8Sx8;
+               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
+               cvt2 = Iop_Widen8Sto16x8;
                op = Iop_Sub16x8;
                break;
             case 1:
                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
-               cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
-               cvt2 = Iop_Longen16Sx4;
+               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
+               cvt2 = Iop_Widen16Sto32x4;
                op = Iop_Sub32x4;
                break;
             case 2:
                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
-               cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
-               cvt2 = Iop_Longen32Sx2;
+               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
+               cvt2 = Iop_Widen32Sto64x2;
                op = Iop_Sub64x2;
                break;
             case 3:
@@ -5526,25 +5527,40 @@
          }
          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
       }
-      switch (size) {
-         case 1:
-            op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
-            break;
-         case 2:
-            op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
-            break;
-         case 0:
-         case 3:
-            return False;
-         default:
-            vassert(0);
+      if (INSN(8,8)) {
+         switch (size) {
+            case 2:
+               op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
+               break;
+            case 0:
+            case 1:
+            case 3:
+               return False;
+            default:
+               vassert(0);
+         }
+      } else {
+         switch (size) {
+            case 1:
+               op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
+               break;
+            case 2:
+               op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
+               break;
+            case 0:
+            case 3:
+               return False;
+            default:
+               vassert(0);
+         }
       }
       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
       if (Q)
          putQReg(dreg, mkexpr(res), condT);
       else
          putDRegI64(dreg, mkexpr(res), condT);
-      DIP("vmul.i%u %c%u, %c%u, d%u[%u]\n", 8 << size, Q ? 'q' : 'd', dreg,
+      DIP("vmul.%c%u %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
+          8 << size, Q ? 'q' : 'd', dreg,
           Q ? 'q' : 'd', nreg, mreg, index);
       return True;
    }
@@ -5597,11 +5613,10 @@
    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
       IROp op ,op2, dup, get;
       ULong imm;
-      IRTemp res, arg_m, arg_n;
+      IRTemp arg_m, arg_n;
       if (dreg & 1)
          return False;
       dreg >>= 1;
-      res = newTemp(Ity_V128);
       arg_m = newTemp(Ity_I64);
       arg_n = newTemp(Ity_I64);
       assign(arg_n, getDRegI64(nreg));
@@ -6339,15 +6354,15 @@
                switch (size) {
                   case 1:
                      op = Iop_ShrN16x8;
-                     narOp = Iop_Shorten16x8;
+                     narOp = Iop_NarrowUn16to8x8;
                      break;
                   case 2:
                      op = Iop_ShrN32x4;
-                     narOp = Iop_Shorten32x4;
+                     narOp = Iop_NarrowUn32to16x4;
                      break;
                   case 3:
                      op = Iop_ShrN64x2;
-                     narOp = Iop_Shorten64x2;
+                     narOp = Iop_NarrowUn64to32x2;
                      break;
                   default:
                      vassert(0);
@@ -6380,17 +6395,17 @@
                   case 1:
                      addOp = Iop_Add16x8;
                      shOp = Iop_ShrN16x8;
-                     narOp = Iop_Shorten16x8;
+                     narOp = Iop_NarrowUn16to8x8;
                      break;
                   case 2:
                      addOp = Iop_Add32x4;
                      shOp = Iop_ShrN32x4;
-                     narOp = Iop_Shorten32x4;
+                     narOp = Iop_NarrowUn32to16x4;
                      break;
                   case 3:
                      addOp = Iop_Add64x2;
                      shOp = Iop_ShrN64x2;
-                     narOp = Iop_Shorten64x2;
+                     narOp = Iop_NarrowUn64to32x2;
                      break;
                   default:
                      vassert(0);
@@ -6429,18 +6444,18 @@
             switch (size) {
                case 1:
                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
-                  cvt = U ? Iop_QShortenU16Ux8 : Iop_QShortenS16Sx8;
-                  cvt2 = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+                  cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
+                  cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
                   break;
                case 2:
                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
-                  cvt = U ? Iop_QShortenU32Ux4 : Iop_QShortenS32Sx4;
-                  cvt2 = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+                  cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
+                  cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
                   break;
                case 3:
                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
-                  cvt = U ? Iop_QShortenU64Ux2 : Iop_QShortenS64Sx2;
-                  cvt2 = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+                  cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
+                  cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
                   break;
                default:
                   vassert(0);
@@ -6452,18 +6467,18 @@
             switch (size) {
                case 1:
                   op = Iop_SarN16x8;
-                  cvt = Iop_QShortenU16Sx8;
-                  cvt2 = Iop_Longen8Ux8;
+                  cvt = Iop_QNarrowUn16Sto8Ux8;
+                  cvt2 = Iop_Widen8Uto16x8;
                   break;
                case 2:
                   op = Iop_SarN32x4;
-                  cvt = Iop_QShortenU32Sx4;
-                  cvt2 = Iop_Longen16Ux4;
+                  cvt = Iop_QNarrowUn32Sto16Ux4;
+                  cvt2 = Iop_Widen16Uto32x4;
                   break;
                case 3:
                   op = Iop_SarN64x2;
-                  cvt = Iop_QShortenU64Sx2;
-                  cvt2 = Iop_Longen32Ux2;
+                  cvt = Iop_QNarrowUn64Sto32Ux2;
+                  cvt2 = Iop_Widen32Uto64x2;
                   break;
                default:
                   vassert(0);
@@ -6523,15 +6538,15 @@
          switch (size) {
             case 0:
                op = Iop_ShlN16x8;
-               cvt = U ? Iop_Longen8Ux8 : Iop_Longen8Sx8;
+               cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
                break;
             case 1:
                op = Iop_ShlN32x4;
-               cvt = U ? Iop_Longen16Ux4 : Iop_Longen16Sx4;
+               cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
                break;
             case 2:
                op = Iop_ShlN64x2;
-               cvt = U ? Iop_Longen32Ux2 : Iop_Longen32Sx2;
+               cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
                break;
             case 3:
                return False;
@@ -7340,9 +7355,9 @@
             IROp op;
             mreg >>= 1;
             switch (size) {
-               case 0: op = Iop_Shorten16x8; break;
-               case 1: op = Iop_Shorten32x4; break;
-               case 2: op = Iop_Shorten64x2; break;
+               case 0: op = Iop_NarrowUn16to8x8;  break;
+               case 1: op = Iop_NarrowUn32to16x4; break;
+               case 2: op = Iop_NarrowUn64to32x2; break;
                case 3: return False;
                default: vassert(0);
             }
@@ -7359,9 +7374,9 @@
                return False;
             mreg >>= 1;
             switch (size) {
-               case 0: op2 = Iop_Shorten16x8; break;
-               case 1: op2 = Iop_Shorten32x4; break;
-               case 2: op2 = Iop_Shorten64x2; break;
+               case 0: op2 = Iop_NarrowUn16to8x8;  break;
+               case 1: op2 = Iop_NarrowUn32to16x4; break;
+               case 2: op2 = Iop_NarrowUn64to32x2; break;
                case 3: return False;
                default: vassert(0);
             }
@@ -7370,9 +7385,9 @@
                   vassert(0);
                case 1:
                   switch (size) {
-                     case 0: op = Iop_QShortenU16Sx8; break;
-                     case 1: op = Iop_QShortenU32Sx4; break;
-                     case 2: op = Iop_QShortenU64Sx2; break;
+                     case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
+                     case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
+                     case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
                      case 3: return False;
                      default: vassert(0);
                   }
@@ -7380,9 +7395,9 @@
                   break;
                case 2:
                   switch (size) {
-                     case 0: op = Iop_QShortenS16Sx8; break;
-                     case 1: op = Iop_QShortenS32Sx4; break;
-                     case 2: op = Iop_QShortenS64Sx2; break;
+                     case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
+                     case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
+                     case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
                      case 3: return False;
                      default: vassert(0);
                   }
@@ -7390,9 +7405,9 @@
                   break;
                case 3:
                   switch (size) {
-                     case 0: op = Iop_QShortenU16Ux8; break;
-                     case 1: op = Iop_QShortenU32Ux4; break;
-                     case 2: op = Iop_QShortenU64Ux2; break;
+                     case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
+                     case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
+                     case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
                      case 3: return False;
                      default: vassert(0);
                   }
@@ -7422,9 +7437,9 @@
             shift_imm = 8 << size;
             res = newTemp(Ity_V128);
             switch (size) {
-               case 0: op = Iop_ShlN16x8; cvt = Iop_Longen8Ux8; break;
-               case 1: op = Iop_ShlN32x4; cvt = Iop_Longen16Ux4; break;
-               case 2: op = Iop_ShlN64x2; cvt = Iop_Longen32Ux2; break;
+               case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
+               case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
+               case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
                case 3: return False;
                default: vassert(0);
             }
@@ -7679,7 +7694,7 @@
          break;
       case 15:
          imm = (imm_raw & 0x80) << 5;
-         imm |= ~((imm_raw & 0x40) << 5);
+         imm |= ((~imm_raw & 0x40) << 5);
          for(i = 1; i <= 4; i++)
             imm |= (imm_raw & 0x40) << i;
          imm |= (imm_raw & 0x7f);
@@ -7911,13 +7926,13 @@
 
 /* A7.7 Advanced SIMD element or structure load/store instructions */
 static
-Bool dis_neon_elem_or_struct_load ( UInt theInstr,
-                                    Bool isT, IRTemp condT )
+Bool dis_neon_load_or_store ( UInt theInstr,
+                              Bool isT, IRTemp condT )
 {
 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
-   UInt A = INSN(23,23);
-   UInt B = INSN(11,8);
-   UInt L = INSN(21,21);
+   UInt bA = INSN(23,23);
+   UInt fB = INSN(11,8);
+   UInt bL = INSN(21,21);
    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
    UInt rN = INSN(19,16);
    UInt rM = INSN(3,0);
@@ -7942,12 +7957,18 @@
    IRTemp initialRm = newTemp(Ity_I32);
    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
 
-   if (A) {
-      N = B & 3;
-      if ((B >> 2) < 3) {
-         /* VSTn / VLDn (n-element structure from/to one lane) */
+   /* There are 3 cases:
+      (1) VSTn / VLDn (n-element structure from/to one lane)
+      (2) VLDn (single element to all lanes)
+      (3) VSTn / VLDn (multiple n-element structures)
+   */
+   if (bA) {
+      N = fB & 3;
+      if ((fB >> 2) < 3) {
+         /* ------------ Case (1) ------------
+            VSTn / VLDn (n-element structure from/to one lane) */
 
-         size = B >> 2;
+         size = fB >> 2;
 
          switch (size) {
             case 0: i = INSN(7,5); inc = 1; break;
@@ -7965,11 +7986,11 @@
             mk_skip_over_T32_if_cond_is_false(condT);
          // now uncond
 
-         if (L)
+         if (bL)
             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
          else
             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
-         DIP("v%s%u.%u {", L ? "ld" : "st", N + 1, 8 << size);
+         DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << size);
          for (j = 0; j <= N; j++) {
             if (j)
                DIP(", ");
@@ -7982,9 +8003,10 @@
             DIP("%s\n", (rM != 15) ? "!" : "");
          }
       } else {
-         /* VLDn (single element to all lanes) */
+         /* ------------ Case (2) ------------ 
+            VLDn (single element to all lanes) */
          UInt r;
-         if (L == 0)
+         if (bL == 0)
             return False;
 
          inc = INSN(5,5) + 1;
@@ -8100,31 +8122,32 @@
       }
       return True;
    } else {
+      /* ------------ Case (3) ------------
+         VSTn / VLDn (multiple n-element structures) */
       IRTemp tmp;
       UInt r, elems;
-      /* VSTn / VLDn (multiple n-element structures) */
-      if (B == BITS4(0,0,1,0) || B == BITS4(0,1,1,0)
-          || B == BITS4(0,1,1,1) || B == BITS4(1,0,1,0)) {
+      if (fB == BITS4(0,0,1,0) || fB == BITS4(0,1,1,0)
+          || fB == BITS4(0,1,1,1) || fB == BITS4(1,0,1,0)) {
          N = 0;
-      } else if (B == BITS4(0,0,1,1) || B == BITS4(1,0,0,0)
-                 || B == BITS4(1,0,0,1)) {
+      } else if (fB == BITS4(0,0,1,1) || fB == BITS4(1,0,0,0)
+                 || fB == BITS4(1,0,0,1)) {
          N = 1;
-      } else if (B == BITS4(0,1,0,0) || B == BITS4(0,1,0,1)) {
+      } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
          N = 2;
-      } else if (B == BITS4(0,0,0,0) || B == BITS4(0,0,0,1)) {
+      } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
          N = 3;
       } else {
          return False;
       }
-      inc = (B & 1) + 1;
-      if (N == 1 && B == BITS4(0,0,1,1)) {
+      inc = (fB & 1) + 1;
+      if (N == 1 && fB == BITS4(0,0,1,1)) {
          regs = 2;
       } else if (N == 0) {
-         if (B == BITS4(1,0,1,0)) {
+         if (fB == BITS4(1,0,1,0)) {
             regs = 2;
-         } else if (B == BITS4(0,1,1,0)) {
+         } else if (fB == BITS4(0,1,1,0)) {
             regs = 3;
-         } else if (B == BITS4(0,0,1,0)) {
+         } else if (fB == BITS4(0,0,1,0)) {
             regs = 4;
          }
       }
@@ -8147,7 +8170,7 @@
 
       for (r = 0; r < regs; r++) {
          for (i = 0; i < elems; i++) {
-            if (L)
+            if (bL)
                mk_neon_elem_load_to_one_lane(rD + r, inc, i, N, size, addr);
             else
                mk_neon_elem_store_from_one_lane(rD + r, inc, i, N, size, addr);
@@ -8177,7 +8200,7 @@
                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
          }
       }
-      DIP("v%s%u.%u {", L ? "ld" : "st", N + 1, 8 << INSN(7,6));
+      DIP("v%s%u.%u {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
       if ((inc == 1 && regs * (N + 1) > 1)
           || (inc == 2 && regs > 1 && N > 0)) {
          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
@@ -8269,12 +8292,12 @@
    */
    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
       // ARM, memory
-      return dis_neon_elem_or_struct_load(INSN(31,0), isT, condT);
+      return dis_neon_load_or_store(INSN(31,0), isT, condT);
    }
    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
       UInt reformatted = INSN(23,0);
       reformatted |= (BITS8(1,1,1,1,0,1,0,0) << 24);
-      return dis_neon_elem_or_struct_load(reformatted, isT, condT);
+      return dis_neon_load_or_store(reformatted, isT, condT);
    }
 
    /* Doesn't match. */
@@ -10211,6 +10234,7 @@
                          UInt regList )
 {
    Int i, r, m, nRegs;
+   IRTemp jk = Ijk_Boring;
 
    /* Get hold of the old Rn value.  We might need to write its value
       to memory during a store, and if it's also the writeback
@@ -10337,6 +10361,15 @@
       }
    }
 
+   /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
+       register and PC in the register list is a return for purposes of branch
+       prediction.
+      The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
+       to be counted in event 0x0E (Procedure return).*/
+   if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
+      jk = Ijk_Ret;
+   }
+
    /* Actually generate the transfers */
    for (i = 0; i < nX; i++) {
       r = xReg[i];
@@ -10345,7 +10378,7 @@
                             binop(opADDorSUB, mkexpr(anchorT),
                                   mkU32(xOff[i])));
          if (arm) {
-            putIRegA( r, e, IRTemp_INVALID, Ijk_Ret );
+            putIRegA( r, e, IRTemp_INVALID, jk );
          } else {
             // no: putIRegT( r, e, IRTemp_INVALID );
             // putIRegT refuses to write to R15.  But that might happen.
@@ -11968,6 +12001,16 @@
          break;
    }
 
+   /* ------------------- CLREX ------------------ */
+   if (insn == 0xF57FF01F) {
+      /* AFAICS, this simply cancels a (all?) reservations made by a
+         (any?) preceding LDREX(es).  Arrange to hand it through to
+         the back end. */
+      stmt( IRStmt_MBE(Imbe_CancelReservation) );
+      DIP("clrex\n");
+      return True;
+   }
+
    /* ------------------- NEON ------------------- */
    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
       Bool ok_neon = decode_NEON_instruction(
@@ -12259,6 +12302,7 @@
          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
+            IRTemp jk = Ijk_Boring;
             if (rN != 0)
                break; /* rN must be zero */
             ok = mk_shifter_operand(
@@ -12277,8 +12321,13 @@
             } else {
                vassert(shco == IRTemp_INVALID);
             }
+            /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
+                return for purposes of branch prediction. */
+            if (!isMVN && INSN(11,0) == 14) {
+              jk = Ijk_Ret;
+            }
             // can't safely read guest state after here
-            putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
+            putIRegA( rD, mkexpr(res), condT, jk );
             /* Update the flags thunk if necessary */
             if (bitS) {
                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, 
@@ -12582,8 +12631,18 @@
 
         /* generate the transfer */
         if (bB == 0) { // word load
+           IRTemp jk = Ijk_Boring;
+           /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
+               base register and PC as the destination register is a return for
+               purposes of branch prediction.
+              The ARM ARM Sec. C9.10.1 further specifies that it must use a
+               post-increment by immediate addressing mode to be counted in
+               event 0x0E (Procedure return).*/
+           if (rN == 13 && summary == (3 | 16) && bB == 0) {
+              jk = Ijk_Ret;
+           }
            putIRegA( rD, loadLE(Ity_I32, mkexpr(taT)),
-                     IRTemp_INVALID, Ijk_Boring );
+                     IRTemp_INVALID, jk );
         } else { // byte load
            vassert(bB == 1);
            putIRegA( rD, unop(Iop_8Uto32, loadLE(Ity_I8, mkexpr(taT))),
@@ -12974,7 +13033,7 @@
             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
                                jk, IRConst_U32(dst) ));
             irsb->next     = mkU32(guest_R15_curr_instr_notENC + 4);
-            irsb->jumpkind = jk;
+            irsb->jumpkind = Ijk_Boring;
             dres.whatNext  = Dis_StopHere;
          }
          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
@@ -12989,7 +13048,7 @@
        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
-      IRExpr* dst;
+      IRTemp  dst = newTemp(Ity_I32);
       UInt    link = (INSN(11,4) >> 1) & 1;
       UInt    rM   = INSN(3,0);
       // we don't decode the case (link && rM == 15), as that's
@@ -13001,12 +13060,12 @@
          // rM contains an interworking address exactly as we require
          // (with continuation CPSR.T in bit 0), so we can use it
          // as-is, with no masking.
-         dst = getIRegA(rM);
+         assign( dst, getIRegA(rM) );
          if (link) {
             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
          }
-         irsb->next     = dst;
+         irsb->next     = mkexpr(dst);
          irsb->jumpkind = link ? Ijk_Call
                                : (rM == 14 ? Ijk_Ret : Ijk_Boring);
          dres.whatNext  = Dis_StopHere;
@@ -13370,52 +13429,107 @@
    /* -- ARMv6 instructions                                    -- */
    /* ----------------------------------------------------------- */
 
-   /* --------------------- ldrex, strex --------------------- */
+   /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
 
-   // LDREX
-   if (0x01900F9F == (insn & 0x0FF00FFF)) {
-      UInt rT = INSN(15,12);
-      UInt rN = INSN(19,16);
-      if (rT == 15 || rN == 15) {
-         /* undecodable; fall through */
+   // LDREXD, LDREX, LDREXH, LDREXB
+   if (0x01900F9F == (insn & 0x0F900FFF)) {
+      UInt   rT    = INSN(15,12);
+      UInt   rN    = INSN(19,16);
+      IRType ty    = Ity_INVALID;
+      IROp   widen = Iop_INVALID;
+      HChar* nm    = NULL;
+      Bool   valid = True;
+      switch (INSN(22,21)) {
+         case 0: nm = "";  ty = Ity_I32; break;
+         case 1: nm = "d"; ty = Ity_I64; break;
+         case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
+         case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
+         default: vassert(0);
+      }
+      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
+         if (rT == 15 || rN == 15)
+            valid = False;
       } else {
+         vassert(ty == Ity_I64);
+         if ((rT & 1) == 1 || rT == 14 || rN == 15)
+            valid = False;
+      }
+      if (valid) {
          IRTemp res;
          /* make unconditional */
          if (condT != IRTemp_INVALID) {
-            mk_skip_over_A32_if_cond_is_false( condT );
-            condT = IRTemp_INVALID;
+           mk_skip_over_A32_if_cond_is_false( condT );
+           condT = IRTemp_INVALID;
          }
          /* Ok, now we're unconditional.  Do the load. */
-         res = newTemp(Ity_I32);
+         res = newTemp(ty);
+         // FIXME: assumes little-endian guest
          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
                            NULL/*this is a load*/) );
-         putIRegA(rT, mkexpr(res), IRTemp_INVALID, Ijk_Boring);
-         DIP("ldrex%s r%u, [r%u]\n", nCC(INSN_COND), rT, rN);
+         if (ty == Ity_I64) {
+            // FIXME: assumes little-endian guest
+            putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
+                           IRTemp_INVALID, Ijk_Boring);
+            putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
+                           IRTemp_INVALID, Ijk_Boring);
+            DIP("ldrex%s%s r%u, r%u, [r%u]\n",
+                nm, nCC(INSN_COND), rT+0, rT+1, rN);
+         } else {
+            putIRegA(rT, widen == Iop_INVALID
+                            ? mkexpr(res) : unop(widen, mkexpr(res)),
+                     IRTemp_INVALID, Ijk_Boring);
+            DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
+         }
          goto decode_success;
       }
-      /* fall through */
+      /* undecodable; fall through */
    }
 
-   // STREX
-   if (0x01800F90 == (insn & 0x0FF00FF0)) {
-      UInt rT = INSN(3,0);
-      UInt rN = INSN(19,16);
-      UInt rD = INSN(15,12);
-      if (rT == 15 || rN == 15 || rD == 15
-          || rD == rT || rD == rN) {
-         /* undecodable; fall through */
+   // STREXD, STREX, STREXH, STREXB
+   if (0x01800F90 == (insn & 0x0F900FF0)) {
+      UInt   rT     = INSN(3,0);
+      UInt   rN     = INSN(19,16);
+      UInt   rD     = INSN(15,12);
+      IRType ty     = Ity_INVALID;
+      IROp   narrow = Iop_INVALID;
+      HChar* nm     = NULL;
+      Bool   valid  = True;
+      switch (INSN(22,21)) {
+         case 0: nm = "";  ty = Ity_I32; break;
+         case 1: nm = "d"; ty = Ity_I64; break;
+         case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
+         case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
+         default: vassert(0);
+      }
+      if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
+         if (rD == 15 || rN == 15 || rT == 15
+             || rD == rN || rD == rT)
+            valid = False;
       } else {
-         IRTemp resSC1, resSC32;
-
+         vassert(ty == Ity_I64);
+         if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
+             || rD == rN || rD == rT || rD == rT+1)
+            valid = False;
+      }
+      if (valid) {
+         IRTemp resSC1, resSC32, data;
          /* make unconditional */
          if (condT != IRTemp_INVALID) {
             mk_skip_over_A32_if_cond_is_false( condT );
             condT = IRTemp_INVALID;
          }
-
          /* Ok, now we're unconditional.  Do the store. */
+         data = newTemp(ty);
+         assign(data,
+                ty == Ity_I64
+                   // FIXME: assumes little-endian guest
+                   ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
+                   : narrow == Iop_INVALID
+                      ? getIRegA(rT)
+                      : unop(narrow, getIRegA(rT)));
          resSC1 = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), getIRegA(rT)) );
+         // FIXME: assumes little-endian guest
+         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
 
          /* Set rD to 1 on failure, 0 on success.  Currently we have
             resSC1 == 0 on failure, 1 on success. */
@@ -13425,7 +13539,13 @@
 
          putIRegA(rD, mkexpr(resSC32),
                       IRTemp_INVALID, Ijk_Boring);
-         DIP("strex%s r%u, r%u, [r%u]\n", nCC(INSN_COND), rD, rT, rN);
+         if (ty == Ity_I64) {
+            DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
+                nm, nCC(INSN_COND), rD, rT, rT+1, rN);
+         } else {
+            DIP("strex%s%s r%u, r%u, [r%u]\n",
+                nm, nCC(INSN_COND), rD, rT, rN);
+         }
          goto decode_success;
       }
       /* fall through */
@@ -13956,69 +14076,6 @@
       goto decode_success;
    }
 
-   // LDREXD
-   if (0x01B00F9F == (insn & 0x0FF00FFF)) {
-      UInt rT = INSN(15,12);
-      UInt rN = INSN(19,16);
-      if ((rT & 1) == 1 || rT == BITS4(1,1,1,0) || rN == 15) {
-         /* undecodable; fall through */
-      } else {
-         IRTemp res;
-         /* make unconditional */
-         if (condT != IRTemp_INVALID) {
-            mk_skip_over_A32_if_cond_is_false( condT );
-            condT = IRTemp_INVALID;
-         }
-         /* Ok, now we're unconditional.  Do the load. */
-         res = newTemp(Ity_I64);
-         stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
-                           NULL/*this is a load*/) );
-         putIRegA(rT+0, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID, Ijk_Boring);
-         putIRegA(rT+1, unop(Iop_64to32, mkexpr(res)), IRTemp_INVALID, Ijk_Boring);
-
-         DIP("ldrexd%s r%u, r%u, [r%u]\n", nCC(INSN_COND), rT+0, rT+1, rN);
-         goto decode_success;
-      }
-      /* fall through */
-   }
-
-   // STREXD
-   if (0x01A00F90 == (insn & 0xFF00FF0)) {
-      UInt rT = INSN(3,0);
-      UInt rD = INSN(15,12);
-      UInt rN = INSN(19,16);
-      if (rD == 15 || (rT & 1) == 1 || rT == BITS4(1,1,1,0) || rN == 15
-          || rD == rN || rD == rT || rD == rT+1) {
-         /* undecodable; fall through */
-      } else {
-         IRTemp data;
-         IRTemp resSC1, resSC32;
-
-         if (condT != IRTemp_INVALID) {
-            mk_skip_over_A32_if_cond_is_false( condT );
-            condT = IRTemp_INVALID;
-         }
-
-         /* Ok, now we're unconditional. Do the store. */
-         data = newTemp(Ity_I64);
-         assign( data, binop(Iop_32HLto64, getIRegA(rT+0), getIRegA(rT+1)) );
-         resSC1 = newTemp(Ity_I1);
-         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
-
-         /* Set rD to 1 on failure, 0 on success.  Currently we have
-            resSC1 == 0 on failure, 1 on success. */
-         resSC32 = newTemp(Ity_I32);
-         assign(resSC32,
-                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
-
-         putIRegA(rD, mkexpr(resSC32),
-                      IRTemp_INVALID, Ijk_Boring);
-         DIP("strexd%s r%u, r%u, r%u, [r%u]\n", nCC(INSN_COND), rD, rT+0, rT+1, rN);
-         goto decode_success;
-      }
-      /* fall through */
-   }
-
    /* ----------------------------------------------------------- */
    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
    /* ----------------------------------------------------------- */
@@ -14103,7 +14160,7 @@
          assert here. */
       vassert(dres.whatNext == Dis_Continue);
       vassert(irsb->next == NULL);
-      vassert(irsb->jumpkind = Ijk_Boring);
+      vassert(irsb->jumpkind == Ijk_Boring);
       /* If r15 is unconditionally written, terminate the block by
          jumping to it.  If it's conditionally written, still
          terminate the block (a shame, but we can't do side exits to
@@ -14140,6 +14197,8 @@
 /*--- Disassemble a single Thumb2 instruction              ---*/
 /*------------------------------------------------------------*/
 
+static const UChar it_length_table[256]; /* fwds */
+
 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
    automagically adds 4 to fetches of r15.  However, writes to regs
    are done with putIRegT, which disallows writes to r15.  Hence any
@@ -14325,8 +14384,8 @@
       if (pageoff >= 18) {
          /* It's safe to poke about in the 9 halfwords preceding this
             insn.  So, have a look at them. */
-         guaranteedUnconditional = True; /* assume no 'it' insn found, till we do */
-
+         guaranteedUnconditional = True; /* assume no 'it' insn found,
+                                            till we do */
          UShort* hwp = (UShort*)(HWord)pc;
          Int i;
          for (i = -1; i >= -9; i--) {
@@ -14337,10 +14396,25 @@
                       == ( pc & 0xFFFFF000 ) );
             */
             /* All valid IT instructions must have the form 0xBFxy,
-               where x can be anything, but y must be nonzero. */
-            if ((hwp[i] & 0xFF00) == 0xBF00 && (hwp[i] & 0xF) != 0) {
-               /* might be an 'it' insn.  Play safe. */
-               guaranteedUnconditional = False;
+               where x can be anything, but y must be nonzero.  Find
+               the number of insns covered by it (1 .. 4) and check to
+               see if it can possibly reach up to the instruction in
+               question.  Some (x,y) combinations mean UNPREDICTABLE,
+               and the table is constructed to be conservative by
+               returning 4 for those cases, so the analysis is safe
+               even if the code uses unpredictable IT instructions (in
+               which case its authors are nuts, but hey.)  */
+            UShort hwp_i = hwp[i];
+            if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
+               /* might be an 'it' insn. */
+               /* # guarded insns */
+               Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
+               vassert(n_guarded >= 1 && n_guarded <= 4);
+               if (n_guarded * 2 /* # guarded HWs, worst case */
+                   > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
+                   /* -(i+0) also seems to work, even though I think
+                      it's wrong.  I don't understand that. */
+                  guaranteedUnconditional = False;
                break;
             }
          }
@@ -14901,7 +14975,7 @@
             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
          }
          irsb->next     = mkexpr(dst);
-         irsb->jumpkind = Ijk_Boring;
+         irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
          dres.whatNext  = Dis_StopHere;
          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
          goto decode_success;
@@ -15040,7 +15114,7 @@
             // now uncond
             /* non-interworking branch */
             irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1));
-            irsb->jumpkind = Ijk_Boring;
+            irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
             dres.whatNext = Dis_StopHere;
          }
          DIP("mov r%u, r%u\n", rD, rM);
@@ -16003,7 +16077,7 @@
       UInt rD = INSN1(11,8);
       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
       /* but allow "add.w reg, sp, #constT" */ 
-      if (!valid && rN == 13)
+      if (!valid && rN == 13 && rD != 15)
          valid = True;
       if (valid) {
          IRTemp argL  = newTemp(Ity_I32);
@@ -16022,24 +16096,21 @@
       }
    }
 
-   /* ---------------- (T4) ADDW Rd, Rn, #imm12 -------------- */
+   /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
    if (INSN0(15,11) == BITS5(1,1,1,1,0)
        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
        && INSN1(15,15) == 0) {
       UInt rN = INSN0(3,0);
       UInt rD = INSN1(11,8);
       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
-      /* but allow "addw reg, sp, #imm12" */
-      if (!valid && rN == 13)
+      /* but allow "addw sp, sp, #uimm12" */
+      if (!valid && rD == 13 && rN == 13)
          valid = True;
       if (valid) {
          IRTemp argL = newTemp(Ity_I32);
          IRTemp argR = newTemp(Ity_I32);
          IRTemp res  = newTemp(Ity_I32);
-         UInt imm1   = INSN0(10,10);
-         UInt imm3   = INSN1(14,12);
-         UInt imm8   = INSN1(7,0);
-         UInt imm12  = (imm1 << 11) | (imm3 << 8) | imm8;
+         UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
          assign(argL, getIRegT(rN));
          assign(argR, mkU32(imm12));
          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
@@ -16113,8 +16184,9 @@
       UInt rN    = INSN0(3,0);
       UInt rD    = INSN1(11,8);
       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
-      /* but allow "sub.w sp, sp, #constT" */
-      if (!valid && !isRSB && rN == 13 && rD == 13)
+      /* but allow "sub{s}.w reg, sp, #constT 
+         this is (T2) of "SUB (SP minus immediate)" */
+      if (!valid && !isRSB && rN == 13 && rD != 15)
          valid = True;
       if (valid) {
          IRTemp argL  = newTemp(Ity_I32);
@@ -16139,14 +16211,15 @@
       }
    }
 
-   /* -------------- (T4) SUBW Rd, Rn, #imm12 ------------------- */
+   /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
    if (INSN0(15,11) == BITS5(1,1,1,1,0)
        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
        && INSN1(15,15) == 0) {
       UInt rN = INSN0(3,0);
       UInt rD = INSN1(11,8);
       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
-      if (!valid && rN == 13 && rD == 13)
+      /* but allow "subw sp, sp, #uimm12" */
+      if (!valid && rD == 13 && rN == 13)
          valid = True;
       if (valid) {
          IRTemp argL  = newTemp(Ity_I32);
@@ -16162,33 +16235,6 @@
       }
    }
 
-   /* -------------- (T2) SUB{S}.W Rd, SP, #constT ------------------- */
-   if (INSN0(15,11) == BITS5(1,1,1,1,0)
-       && INSN0(9,5) == BITS5(0,1,1,0,1)
-       && INSN0(3,0) == BITS4(1,1,0,1)
-       && INSN1(15,15) == 0) {
-      UInt rN = 13; // SP
-      UInt rD = INSN1(11,8);
-      UInt bS    = INSN0(4,4);
-      Bool valid = !isBadRegT(rD);
-      if (valid) {
-         IRTemp argL  = newTemp(Ity_I32);
-         IRTemp argR  = newTemp(Ity_I32);
-         IRTemp res   = newTemp(Ity_I32);
-         UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
-         assign(argL, getIRegT(rN));
-         assign(argR, mkU32(imm12));
-         assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
-         putIRegT(rD, mkexpr(res), condT);
-         if (bS == 1) {
-               setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
-         }
-         DIP("sub%s.w r%u, sp, #%u\n",
-             bS == 1 ? "s" : "", rD, imm12);
-         goto decode_success;
-      }
-   }
-
    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
    if (INSN0(15,11) == BITS5(1,1,1,1,0)
@@ -16311,14 +16357,16 @@
       UInt how  = INSN1(5,4);
 
       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
-      /* but allow "add.w reg, sp, reg   w/ no shift */
+      /* but allow "add.w reg, sp, reg   w/ no shift
+         (T3) "ADD (SP plus register) */
       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
-          && rN == 13 && imm5 == 0 && how == 0) {
+          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
          valid = True;
       }
-      /* also allow "sub.w sp, sp, reg   w/ no shift */
-      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // add
-          && rD == 13 && rN == 13 && imm5 == 0 && how == 0) {
+      /* also allow "sub.w reg, sp, reg   w/ no shift
+         (T1) "SUB (SP minus register) */
+      if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
+          && rD != 15 && rN == 13 && imm5 == 0 && how == 0) {
          valid = True;
       }
       if (valid) {
@@ -16519,7 +16567,6 @@
       UInt rM  = INSN1(3,0);
       UInt bS  = INSN0(4,4);
       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
-      if (how == 3) valid = False; //ATC
       if (valid) {
          IRTemp rNt    = newTemp(Ity_I32);
          IRTemp rMt    = newTemp(Ity_I32);
@@ -17842,6 +17889,49 @@
       }
    }
 
+   /* --------------- (T1) LDREX{B,H} --------------- */
+   if (INSN0(15,4) == 0xE8D
+       && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
+      UInt rN  = INSN0(3,0);
+      UInt rT  = INSN1(15,12);
+      Bool isH = INSN1(11,0) == 0xF5F;
+      if (!isBadRegT(rT) && rN != 15) {
+         IRTemp res;
+         // go uncond
+         mk_skip_over_T32_if_cond_is_false( condT );
+         // now uncond
+         res = newTemp(isH ? Ity_I16 : Ity_I8);
+         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
+                           NULL/*this is a load*/ ));
+         putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
+                      IRTemp_INVALID);
+         DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
+         goto decode_success;
+      }
+   }
+
+   /* --------------- (T1) LDREXD --------------- */
+   if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
+      UInt rN  = INSN0(3,0);
+      UInt rT  = INSN1(15,12);
+      UInt rT2 = INSN1(11,8);
+      if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
+         IRTemp res;
+         // go uncond
+         mk_skip_over_T32_if_cond_is_false( condT );
+         // now uncond
+         res = newTemp(Ity_I64);
+         // FIXME: assumes little-endian guest
+         stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
+                           NULL/*this is a load*/ ));
+         // FIXME: assumes little-endian guest
+         putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
+         putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
+         DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
+         goto decode_success;
+      }
+   }
+
    /* ----------------- (T1) STREX ----------------- */
    if (INSN0(15,4) == 0xE84) {
       UInt rN   = INSN0(3,0);
@@ -17851,50 +17941,87 @@
       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15 
           && rD != rN && rD != rT) {
          IRTemp resSC1, resSC32;
-
          // go uncond
          mk_skip_over_T32_if_cond_is_false( condT );
          // now uncond
-
          /* Ok, now we're unconditional.  Do the store. */
          resSC1 = newTemp(Ity_I1);
          stmt( IRStmt_LLSC(Iend_LE,
                            resSC1,
                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
                            getIRegT(rT)) );
-
          /* Set rD to 1 on failure, 0 on success.  Currently we have
             resSC1 == 0 on failure, 1 on success. */
          resSC32 = newTemp(Ity_I32);
          assign(resSC32,
                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
-
          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
          goto decode_success;
       }
    }
 
-   /* -------------- read CP15 TPIDRURO register ------------- */
-   /* mrc     p15, 0, r0, c13, c0, 3  up to
-      mrc     p15, 0, r14, c13, c0, 3
-   */
-   /* I don't know whether this is really v7-only.  But anyway, we
-      have to support it since arm-linux uses TPIDRURO as a thread
-      state register. */
-   if (INSN0(15,0) == 0xEE1D && INSN1(11,0) == 0xF70) {
-      UInt rD = INSN1(15,12);
-      if (rD <= 14) {
-         /* skip r15, that's too stupid to handle */
-         putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
-                      condT);
-         DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
+   /* --------------- (T1) STREX{B,H} --------------- */
+   if (INSN0(15,4) == 0xE8C
+       && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
+      UInt rN  = INSN0(3,0);
+      UInt rT  = INSN1(15,12);
+      UInt rD  = INSN1(3,0);
+      Bool isH = INSN1(11,4) == 0xF5;
+      if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15 
+          && rD != rN && rD != rT) {
+         IRTemp resSC1, resSC32;
+         // go uncond
+         mk_skip_over_T32_if_cond_is_false( condT );
+         // now uncond
+         /* Ok, now we're unconditional.  Do the store. */
+         resSC1 = newTemp(Ity_I1);
+         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
+                           unop(isH ? Iop_32to16 : Iop_32to8,
+                                getIRegT(rT))) );
+         /* Set rD to 1 on failure, 0 on success.  Currently we have
+            resSC1 == 0 on failure, 1 on success. */
+         resSC32 = newTemp(Ity_I32);
+         assign(resSC32,
+                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
+         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
+         DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
          goto decode_success;
       }
    }
 
+   /* ---------------- (T1) STREXD ---------------- */
+   if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
+      UInt rN  = INSN0(3,0);
+      UInt rT  = INSN1(15,12);
+      UInt rT2 = INSN1(11,8);
+      UInt rD  = INSN1(3,0);
+      if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
+          && rN != 15 && rD != rN && rD != rT && rD != rT) {
+         IRTemp resSC1, resSC32, data;
+         // go uncond
+         mk_skip_over_T32_if_cond_is_false( condT );
+         // now uncond
+         /* Ok, now we're unconditional.  Do the store. */
+         resSC1 = newTemp(Ity_I1);
+         data = newTemp(Ity_I64);
+         // FIXME: assumes little-endian guest
+         assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
+         // FIXME: assumes little-endian guest
+         stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
+         /* Set rD to 1 on failure, 0 on success.  Currently we have
+            resSC1 == 0 on failure, 1 on success. */
+         resSC32 = newTemp(Ity_I32);
+         assign(resSC32,
+                unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
+         putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
+         DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
+         goto decode_success;
+      }
+   }
    /* -------------- v7 barrier insns -------------- */
    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
+      /* FIXME: should this be unconditional? */
       /* XXX this isn't really right, is it?  The generated IR does
          them unconditionally.  I guess it doesn't matter since it
          doesn't do any harm to do them even when the guarding
@@ -17931,6 +18058,41 @@
       }
    }
 
+   /* ---------------------- PLD{,W} ---------------------- */
+   if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
+      /* FIXME: should this be unconditional? */
+      /* PLD/PLDW immediate, encoding T1 */
+      UInt rN    = INSN0(3,0);
+      UInt bW    = INSN0(5,5);
+      UInt imm12 = INSN1(11,0);
+      DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
+      goto decode_success;
+   }
+
+   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
+      /* FIXME: should this be unconditional? */
+      /* PLD/PLDW immediate, encoding T2 */
+      UInt rN    = INSN0(3,0);
+      UInt bW    = INSN0(5,5);
+      UInt imm8  = INSN1(7,0);
+      DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
+      goto decode_success;
+   }
+
+   if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
+      /* FIXME: should this be unconditional? */
+      /* PLD/PLDW register, encoding T1 */
+      UInt rN   = INSN0(3,0);
+      UInt rM   = INSN1(3,0);
+      UInt bW   = INSN0(5,5);
+      UInt imm2 = INSN1(5,4);
+      if (!isBadRegT(rM)) {
+         DIP("pld%s [r%u, r%u, lsl %d]\n", bW ? "w" : "", rN, rM, imm2);
+         goto decode_success;
+      }
+      /* fall through */
+   }
+
    /* -------------- read CP15 TPIDRURO register ------------- */
    /* mrc     p15, 0,  r0, c13, c0, 3  up to
       mrc     p15, 0, r14, c13, c0, 3
@@ -17938,8 +18100,8 @@
    /* I don't know whether this is really v7-only.  But anyway, we
       have to support it since arm-linux uses TPIDRURO as a thread
       state register. */
-   
    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
+      /* FIXME: should this be unconditional? */
       UInt rD = INSN1(15,12);
       if (!isBadRegT(rD)) {
          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), IRTemp_INVALID);
@@ -17949,6 +18111,17 @@
       /* fall through */
    }
 
+   /* ------------------- CLREX ------------------ */
+   if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
+      /* AFAICS, this simply cancels a (all?) reservations made by a
+         (any?) preceding LDREX(es).  Arrange to hand it through to
+         the back end. */
+      mk_skip_over_T32_if_cond_is_false( condT );
+      stmt( IRStmt_MBE(Imbe_CancelReservation) );
+      DIP("clrex\n");
+      goto decode_success;
+   }
+
    /* ------------------- NOP ------------------ */
    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
       DIP("nop\n");
@@ -18046,7 +18219,7 @@
          assert here. */
       vassert(dres.whatNext == Dis_Continue);
       vassert(irsb->next == NULL);
-      vassert(irsb->jumpkind = Ijk_Boring);
+      vassert(irsb->jumpkind == Ijk_Boring);
       /* If r15 is unconditionally written, terminate the block by
          jumping to it.  If it's conditionally written, still
          terminate the block (a shame, but we can't do side exits to
@@ -18083,6 +18256,85 @@
 #undef DIS
 
 
+/* Helper table for figuring out how many insns an IT insn
+   conditionalises.
+
+   An ITxyz instruction of the format "1011 1111 firstcond mask"
+   conditionalises some number of instructions, as indicated by the
+   following table.  A value of zero indicates the instruction is
+   invalid in some way.
+
+   mask = 0 means this isn't an IT instruction
+   fc = 15 (NV) means unpredictable
+
+   The line fc = 14 (AL) is different from the others; there are
+   additional constraints in this case.
+
+          mask(0 ..                   15)
+        +--------------------------------
+   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0 
+   15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
+
+   To be conservative with the analysis, let's rule out the mask = 0
+   case, since that isn't an IT insn at all.  But for all the other
+   cases where the table contains zero, that means unpredictable, so
+   let's say 4 to be conservative.  Hence we have a safe value for any
+   IT (mask,fc) pair that the CPU would actually identify as an IT
+   instruction.  The final table is
+
+          mask(0 ..                   15)
+        +--------------------------------
+   fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+   ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 
+        | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4 
+   15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 
+*/
+static const UChar it_length_table[256]
+   = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4, 
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
+       0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
+       0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
+     };
+
+
 /*------------------------------------------------------------*/
 /*--- Top-level fn                                         ---*/
 /*------------------------------------------------------------*/
diff --git a/main/VEX/priv/guest_generic_bb_to_IR.c b/main/VEX/priv/guest_generic_bb_to_IR.c
index f7dc020..32dca8c 100644
--- a/main/VEX/priv/guest_generic_bb_to_IR.c
+++ b/main/VEX/priv/guest_generic_bb_to_IR.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -42,33 +42,60 @@
 
 
 /* Forwards .. */
-__attribute__((regparm(2)))
+VEX_REGPARM(2)
 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 );
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 );
 
+VEX_REGPARM(2)
+static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 );
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 );
+
 /* Small helpers */
 static Bool const_False ( void* callback_opaque, Addr64 a ) { 
    return False; 
@@ -85,8 +112,16 @@
    dis_instr_fn is the arch-specific fn to disassemble on function; it
    is this that does the real work.
 
-   do_self_check indicates that the caller needs a self-checking
-   translation.
+   needs_self_check is a callback used to ask the caller which of the
+   extents, if any, a self check is required for.  The returned value
+   is a bitmask with a 1 in position i indicating that the i'th extent
+   needs a check.  Since there can be at most 3 extents, the returned
+   values must be between 0 and 7.
+
+   The number of extents which did get a self check (0 to 3) is put in
+   n_sc_extents.  The caller already knows this because it told us
+   which extents to add checks for, via the needs_self_check callback,
+   but we ship the number back out here for the caller's convenience.
 
    preamble_function is a callback which allows the caller to add
    its own IR preamble (following the self-check, if any).  May be
@@ -105,27 +140,31 @@
    (In fact it's a VgInstrumentClosure.)
 */
 
-IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
-                 /*IN*/ void*            callback_opaque,
-                 /*IN*/ DisOneInstrFn    dis_instr_fn,
-                 /*IN*/ UChar*           guest_code,
-                 /*IN*/ Addr64           guest_IP_bbstart,
-                 /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
-                 /*IN*/ Bool             host_bigendian,
-                 /*IN*/ VexArch          arch_guest,
-                 /*IN*/ VexArchInfo*     archinfo_guest,
-                 /*IN*/ VexAbiInfo*      abiinfo_both,
-                 /*IN*/ IRType           guest_word_type,
-                 /*IN*/ Bool             do_self_check,
-                 /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
-                 /*IN*/ Int              offB_TISTART,
-                 /*IN*/ Int              offB_TILEN )
+IRSB* bb_to_IR ( 
+         /*OUT*/VexGuestExtents* vge,
+         /*OUT*/UInt*            n_sc_extents,
+         /*IN*/ void*            callback_opaque,
+         /*IN*/ DisOneInstrFn    dis_instr_fn,
+         /*IN*/ UChar*           guest_code,
+         /*IN*/ Addr64           guest_IP_bbstart,
+         /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
+         /*IN*/ Bool             host_bigendian,
+         /*IN*/ VexArch          arch_guest,
+         /*IN*/ VexArchInfo*     archinfo_guest,
+         /*IN*/ VexAbiInfo*      abiinfo_both,
+         /*IN*/ IRType           guest_word_type,
+         /*IN*/ UInt             (*needs_self_check)(void*,VexGuestExtents*),
+         /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
+         /*IN*/ Int              offB_TISTART,
+         /*IN*/ Int              offB_TILEN
+      )
 {
    Long       delta;
    Int        i, n_instrs, first_stmt_idx;
    Bool       resteerOK, need_to_put_IP, debug_print;
    DisResult  dres;
    IRStmt*    imark;
+   IRStmt*    nop;
    static Int n_resteers = 0;
    Int        d_resteers = 0;
    Int        selfcheck_idx = 0;
@@ -138,11 +177,6 @@
 
    debug_print = toBool(vex_traceflags & VEX_TRACE_FE);
 
-   /* Note: for adler32 to work without % operation for the self
-      check, need to limit length of stuff it scans to 5552 bytes.
-      Therefore limiting the max bb len to 100 insns seems generously
-      conservative. */
-
    /* check sanity .. */
    vassert(sizeof(HWord) == sizeof(void*));
    vassert(vex_control.guest_max_insns >= 1);
@@ -155,6 +189,7 @@
    vge->n_used  = 1;
    vge->base[0] = guest_IP_bbstart;
    vge->len[0]  = 0;
+   *n_sc_extents = 0;
 
    /* And a new IR superblock to dump the result into. */
    irsb = emptyIRSB();
@@ -164,26 +199,21 @@
    delta    = 0;
    n_instrs = 0;
 
-   /* Guest addresses as IRConsts.  Used in the two self-checks
-      generated. */
-   if (do_self_check) {
-      guest_IP_bbstart_IRConst
-         = guest_word_type==Ity_I32 
-              ? IRConst_U32(toUInt(guest_IP_bbstart))
-              : IRConst_U64(guest_IP_bbstart);
-   }
+   /* Guest addresses as IRConsts.  Used in self-checks to specify the
+      restart-after-discard point. */
+   guest_IP_bbstart_IRConst
+      = guest_word_type==Ity_I32 
+           ? IRConst_U32(toUInt(guest_IP_bbstart))
+           : IRConst_U64(guest_IP_bbstart);
 
-   /* If asked to make a self-checking translation, leave 5 spaces
-      in which to put the check statements.  We'll fill them in later
-      when we know the length and adler32 of the area to check. */
-   if (do_self_check) {
-      selfcheck_idx = irsb->stmts_used;
-      addStmtToIRSB( irsb, IRStmt_NoOp() );
-      addStmtToIRSB( irsb, IRStmt_NoOp() );
-      addStmtToIRSB( irsb, IRStmt_NoOp() );
-      addStmtToIRSB( irsb, IRStmt_NoOp() );
-      addStmtToIRSB( irsb, IRStmt_NoOp() );
-   }
+   /* Leave 15 spaces in which to put the check statements for a self
+      checking translation (up to 3 extents, and 5 stmts required for
+      each).  We won't know until later the extents and checksums of
+      the areas, if any, that need to be checked. */
+   nop = IRStmt_NoOp();
+   selfcheck_idx = irsb->stmts_used;
+   for (i = 0; i < 3 * 5; i++)
+      addStmtToIRSB( irsb, nop );
 
    /* If the caller supplied a function to add its own preamble, use
       it now. */
@@ -193,7 +223,7 @@
          /* The callback has completed the IR block without any guest
             insns being disassembled into it, so just return it at
             this point, even if a self-check was requested - as there
-            is nothing to self-check.  The five self-check no-ops will
+            is nothing to self-check.  The 15 self-check no-ops will
             still be in place, but they are harmless. */
          return irsb;
       }
@@ -208,11 +238,6 @@
       resteerOK 
          = toBool(
               n_instrs < vex_control.guest_chase_thresh
-              /* If making self-checking translations, don't chase
-                 .. it makes the checks too complicated.  We only want
-                 to scan just one sequence of bytes in the check, not
-                 a whole bunch. */
-              && !do_self_check
               /* we can't afford to have a resteer once we're on the
                  last extent slot. */
               && vge->n_used < 3
@@ -241,8 +266,36 @@
 
       /* Add an instruction-mark statement.  We won't know until after
          disassembling the instruction how long it instruction is, so
-         just put in a zero length and we'll fix it up later. */
-      addStmtToIRSB( irsb, IRStmt_IMark( guest_IP_curr_instr, 0 ));
+         just put in a zero length and we'll fix it up later.
+
+         On ARM, the least significant bit of the instr address
+         distinguishes ARM vs Thumb instructions.  All instructions
+         actually start on at least 2-aligned addresses.  So we need
+         to ignore the bottom bit of the insn address when forming the
+         IMark's address field, but put that bottom bit in the delta
+         field, so that comparisons against guest_R15T for Thumb can
+         be done correctly.  By inspecting the delta field,
+         instruction processors can determine whether the instruction
+         was originally Thumb or ARM.  For more details of this
+         convention, see comments on definition of guest_R15T in
+         libvex_guest_arm.h. */
+      if (arch_guest == VexArchARM && (guest_IP_curr_instr & (Addr64)1)) {
+         /* Thumb insn => mask out the T bit, but put it in delta */
+         addStmtToIRSB( irsb,
+                        IRStmt_IMark(guest_IP_curr_instr & ~(Addr64)1,
+                                     0, /* len */
+                                     1  /* delta */
+                        )
+         );
+      } else {
+         /* All other targets: store IP as-is, and set delta to zero. */
+         addStmtToIRSB( irsb,
+                        IRStmt_IMark(guest_IP_curr_instr,
+                                     0, /* len */
+                                     0  /* delta */
+                        )
+         );
+      }
 
       /* for the first insn, the dispatch loop will have set
          %IP, but for all the others we have to do it ourselves. */
@@ -375,7 +428,8 @@
 
   done:
    /* We're done.  The only thing that might need attending to is that
-      a self-checking preamble may need to be created.
+      a self-checking preamble may need to be created.  If so it gets
+      placed in the 15 slots reserved above.
 
       The scheme is to compute a rather crude checksum of the code
       we're making a translation of, and add to the IR a call to a
@@ -384,89 +438,171 @@
       match.  This is obviously very expensive and considerable
       efforts are made to speed it up:
 
-      * the checksum is computed from all the 32-bit words that
-        overlap the translated code.  That means it could depend on up
-        to 3 bytes before and 3 bytes after which aren't part of the
-        translated area, and so if those change then we'll
-        unnecessarily have to discard and retranslate.  This seems
-        like a pretty remote possibility and it seems as if the
-        benefit of not having to deal with the ends of the range at
-        byte precision far outweigh any possible extra translations
-        needed.
+      * the checksum is computed from all the naturally aligned
+        host-sized words that overlap the translated code.  That means
+        it could depend on up to 7 bytes before and 7 bytes after
+        which aren't part of the translated area, and so if those
+        change then we'll unnecessarily have to discard and
+        retranslate.  This seems like a pretty remote possibility and
+        it seems as if the benefit of not having to deal with the ends
+        of the range at byte precision far outweigh any possible extra
+        translations needed.
 
       * there's a generic routine and 12 specialised cases, which
         handle the cases of 1 through 12-word lengths respectively.
         They seem to cover about 90% of the cases that occur in
         practice.
-   */
-   if (do_self_check) {
 
-      UInt     len2check, expected32;
+      We ask the caller, via needs_self_check, which of the 3 vge
+      extents needs a check, and only generate check code for those
+      that do.
+   */
+   {
+      Addr64   base2check;
+      UInt     len2check;
+      HWord    expectedhW;
       IRTemp   tistart_tmp, tilen_tmp;
-      UInt     (*fn_generic)(HWord, HWord) __attribute__((regparm(2)));
-      UInt     (*fn_spec)(HWord) __attribute__((regparm(1)));
+      HWord    VEX_REGPARM(2) (*fn_generic)(HWord, HWord);
+      HWord    VEX_REGPARM(1) (*fn_spec)(HWord);
       HChar*   nm_generic;
       HChar*   nm_spec;
       HWord    fn_generic_entry = 0;
       HWord    fn_spec_entry = 0;
+      UInt     host_word_szB = sizeof(HWord);
+      IRType   host_word_type = Ity_INVALID;
 
-      vassert(vge->n_used == 1);
-      len2check = vge->len[0];
+      VexGuestExtents vge_tmp = *vge;
+      UInt extents_needing_check
+         = needs_self_check(callback_opaque, &vge_tmp);
 
-      /* stay sane */
-      vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
+      if (host_word_szB == 4) host_word_type = Ity_I32;
+      if (host_word_szB == 8) host_word_type = Ity_I64;
+      vassert(host_word_type != Ity_INVALID);
 
-      /* Skip the check if the translation involved zero bytes */
-      if (len2check > 0) {
-         HWord first_w32 = ((HWord)guest_code) & ~(HWord)3;
-         HWord last_w32  = (((HWord)guest_code) + len2check - 1) & ~(HWord)3;
-         vassert(first_w32 <= last_w32);
-         HWord w32_diff = last_w32 - first_w32;
-         vassert(0 == (w32_diff & 3));
-         HWord w32s_to_check = (w32_diff + 4) / 4;
-         vassert(w32s_to_check > 0 && w32s_to_check < 1004/*arbitrary*//4);
+      vassert(vge->n_used >= 1 && vge->n_used <= 3);
 
-         /* vex_printf("%lx %lx  %ld\n", first_w32, last_w32, w32s_to_check); */
+      /* Caller shouldn't claim that nonexistent extents need a
+         check. */
+      vassert((extents_needing_check >> vge->n_used) == 0);
 
-         fn_generic =  genericg_compute_checksum_4al;
-         nm_generic = "genericg_compute_checksum_4al";
+      for (i = 0; i < vge->n_used; i++) {
+
+         /* Do we need to generate a check for this extent? */
+         if ((extents_needing_check & (1 << i)) == 0)
+            continue;
+
+         /* Tell the caller */
+         (*n_sc_extents)++;
+
+         /* the extent we're generating a check for */
+         base2check = vge->base[i];
+         len2check  = vge->len[i];
+
+         /* stay sane */
+         vassert(len2check >= 0 && len2check < 1000/*arbitrary*/);
+
+         /* Skip the check if the translation involved zero bytes */
+         if (len2check == 0)
+            continue;
+
+         HWord first_hW = ((HWord)base2check)
+                          & ~(HWord)(host_word_szB-1);
+         HWord last_hW  = (((HWord)base2check) + len2check - 1)
+                          & ~(HWord)(host_word_szB-1);
+         vassert(first_hW <= last_hW);
+         HWord hW_diff = last_hW - first_hW;
+         vassert(0 == (hW_diff & (host_word_szB-1)));
+         HWord hWs_to_check = (hW_diff + host_word_szB) / host_word_szB;
+         vassert(hWs_to_check > 0
+                 && hWs_to_check < 1004/*arbitrary*/ / host_word_szB);
+
+         /* vex_printf("%lx %lx  %ld\n", first_hW, last_hW, hWs_to_check); */
+
+         if (host_word_szB == 8) {
+            fn_generic =  (VEX_REGPARM(2) HWord(*)(HWord, HWord))
+                          genericg_compute_checksum_8al;
+            nm_generic = "genericg_compute_checksum_8al";
+         } else {
+            fn_generic =  (VEX_REGPARM(2) HWord(*)(HWord, HWord))
+                          genericg_compute_checksum_4al;
+            nm_generic = "genericg_compute_checksum_4al";
+         }
+
          fn_spec = NULL;
          nm_spec = NULL;
 
-         switch (w32s_to_check) {
-             case 1:  fn_spec =  genericg_compute_checksum_4al_1;
-                      nm_spec = "genericg_compute_checksum_4al_1"; break;
-             case 2:  fn_spec =  genericg_compute_checksum_4al_2;
-                      nm_spec = "genericg_compute_checksum_4al_2"; break;
-             case 3:  fn_spec =  genericg_compute_checksum_4al_3;
-                      nm_spec = "genericg_compute_checksum_4al_3"; break;
-             case 4:  fn_spec =  genericg_compute_checksum_4al_4;
-                      nm_spec = "genericg_compute_checksum_4al_4"; break;
-             case 5:  fn_spec =  genericg_compute_checksum_4al_5;
-                      nm_spec = "genericg_compute_checksum_4al_5"; break;
-             case 6:  fn_spec =  genericg_compute_checksum_4al_6;
-                      nm_spec = "genericg_compute_checksum_4al_6"; break;
-             case 7:  fn_spec =  genericg_compute_checksum_4al_7;
-                      nm_spec = "genericg_compute_checksum_4al_7"; break;
-             case 8:  fn_spec =  genericg_compute_checksum_4al_8;
-                      nm_spec = "genericg_compute_checksum_4al_8"; break;
-             case 9:  fn_spec =  genericg_compute_checksum_4al_9;
-                      nm_spec = "genericg_compute_checksum_4al_9"; break;
-             case 10: fn_spec =  genericg_compute_checksum_4al_10;
-                      nm_spec = "genericg_compute_checksum_4al_10"; break;
-             case 11: fn_spec =  genericg_compute_checksum_4al_11;
-                      nm_spec = "genericg_compute_checksum_4al_11"; break;
-             case 12: fn_spec =  genericg_compute_checksum_4al_12;
-                      nm_spec = "genericg_compute_checksum_4al_12"; break;
-             default: break;
+         if (host_word_szB == 8) {
+            HChar* nm = NULL;
+            ULong  VEX_REGPARM(1) (*fn)(HWord)  = NULL;
+            switch (hWs_to_check) {
+               case 1:  fn =  genericg_compute_checksum_8al_1;
+                        nm = "genericg_compute_checksum_8al_1"; break;
+               case 2:  fn =  genericg_compute_checksum_8al_2;
+                        nm = "genericg_compute_checksum_8al_2"; break;
+               case 3:  fn =  genericg_compute_checksum_8al_3;
+                        nm = "genericg_compute_checksum_8al_3"; break;
+               case 4:  fn =  genericg_compute_checksum_8al_4;
+                        nm = "genericg_compute_checksum_8al_4"; break;
+               case 5:  fn =  genericg_compute_checksum_8al_5;
+                        nm = "genericg_compute_checksum_8al_5"; break;
+               case 6:  fn =  genericg_compute_checksum_8al_6;
+                        nm = "genericg_compute_checksum_8al_6"; break;
+               case 7:  fn =  genericg_compute_checksum_8al_7;
+                        nm = "genericg_compute_checksum_8al_7"; break;
+               case 8:  fn =  genericg_compute_checksum_8al_8;
+                        nm = "genericg_compute_checksum_8al_8"; break;
+               case 9:  fn =  genericg_compute_checksum_8al_9;
+                        nm = "genericg_compute_checksum_8al_9"; break;
+               case 10: fn =  genericg_compute_checksum_8al_10;
+                        nm = "genericg_compute_checksum_8al_10"; break;
+               case 11: fn =  genericg_compute_checksum_8al_11;
+                        nm = "genericg_compute_checksum_8al_11"; break;
+               case 12: fn =  genericg_compute_checksum_8al_12;
+                        nm = "genericg_compute_checksum_8al_12"; break;
+               default: break;
+            }
+            fn_spec = (VEX_REGPARM(1) HWord(*)(HWord)) fn;
+            nm_spec = nm;
+         } else {
+            HChar* nm = NULL;
+            UInt   VEX_REGPARM(1) (*fn)(HWord) = NULL;
+            switch (hWs_to_check) {
+               case 1:  fn =  genericg_compute_checksum_4al_1;
+                        nm = "genericg_compute_checksum_4al_1"; break;
+               case 2:  fn =  genericg_compute_checksum_4al_2;
+                        nm = "genericg_compute_checksum_4al_2"; break;
+               case 3:  fn =  genericg_compute_checksum_4al_3;
+                        nm = "genericg_compute_checksum_4al_3"; break;
+               case 4:  fn =  genericg_compute_checksum_4al_4;
+                        nm = "genericg_compute_checksum_4al_4"; break;
+               case 5:  fn =  genericg_compute_checksum_4al_5;
+                        nm = "genericg_compute_checksum_4al_5"; break;
+               case 6:  fn =  genericg_compute_checksum_4al_6;
+                        nm = "genericg_compute_checksum_4al_6"; break;
+               case 7:  fn =  genericg_compute_checksum_4al_7;
+                        nm = "genericg_compute_checksum_4al_7"; break;
+               case 8:  fn =  genericg_compute_checksum_4al_8;
+                        nm = "genericg_compute_checksum_4al_8"; break;
+               case 9:  fn =  genericg_compute_checksum_4al_9;
+                        nm = "genericg_compute_checksum_4al_9"; break;
+               case 10: fn =  genericg_compute_checksum_4al_10;
+                        nm = "genericg_compute_checksum_4al_10"; break;
+               case 11: fn =  genericg_compute_checksum_4al_11;
+                        nm = "genericg_compute_checksum_4al_11"; break;
+               case 12: fn =  genericg_compute_checksum_4al_12;
+                        nm = "genericg_compute_checksum_4al_12"; break;
+               default: break;
+            }
+            fn_spec = (VEX_REGPARM(1) HWord(*)(HWord))fn;
+            nm_spec = nm;
          }
 
-         expected32 = fn_generic( first_w32, w32s_to_check );
+         expectedhW = fn_generic( first_hW, hWs_to_check );
          /* If we got a specialised version, check it produces the same
             result as the generic version! */
          if (fn_spec) {
             vassert(nm_spec);
-            vassert(expected32 == fn_spec( first_w32 ));
+            vassert(expectedhW == fn_spec( first_hW ));
          } else {
             vassert(!nm_spec);
          }
@@ -478,20 +614,23 @@
          tistart_tmp = newIRTemp(irsb->tyenv, guest_word_type);
          tilen_tmp   = newIRTemp(irsb->tyenv, guest_word_type);
 
-         irsb->stmts[selfcheck_idx+0]
-            = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(guest_IP_bbstart_IRConst) );
+         IRConst* base2check_IRConst
+            = guest_word_type==Ity_I32 ? IRConst_U32(toUInt(base2check))
+                                       : IRConst_U64(base2check);
+         IRConst* len2check_IRConst
+            = guest_word_type==Ity_I32 ? IRConst_U32(len2check)
+                                       : IRConst_U64(len2check);
 
-         irsb->stmts[selfcheck_idx+1]
-            = IRStmt_WrTmp(tilen_tmp,
-                           guest_word_type==Ity_I32 
-                              ? IRExpr_Const(IRConst_U32(len2check)) 
-                              : IRExpr_Const(IRConst_U64(len2check))
-              );
+         irsb->stmts[selfcheck_idx + i * 5 + 0]
+            = IRStmt_WrTmp(tistart_tmp, IRExpr_Const(base2check_IRConst) );
 
-         irsb->stmts[selfcheck_idx+2]
+         irsb->stmts[selfcheck_idx + i * 5 + 1]
+            = IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
+
+         irsb->stmts[selfcheck_idx + i * 5 + 2]
             = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
 
-         irsb->stmts[selfcheck_idx+3]
+         irsb->stmts[selfcheck_idx + i * 5 + 3]
             = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
 
          /* Generate the entry point descriptors */
@@ -516,34 +655,39 @@
          IRExpr* callexpr = NULL;
          if (fn_spec) {
             callexpr = mkIRExprCCall( 
-                          Ity_I32, 1/*regparms*/, 
+                          host_word_type, 1/*regparms*/, 
                           nm_spec, (void*)fn_spec_entry,
                           mkIRExprVec_1(
-                             mkIRExpr_HWord( (HWord)first_w32 )
+                             mkIRExpr_HWord( (HWord)first_hW )
                           )
                        );
          } else {
             callexpr = mkIRExprCCall( 
-                          Ity_I32, 2/*regparms*/, 
+                          host_word_type, 2/*regparms*/, 
                           nm_generic, (void*)fn_generic_entry,
                           mkIRExprVec_2(
-                             mkIRExpr_HWord( (HWord)first_w32 ),
-                             mkIRExpr_HWord( (HWord)w32s_to_check )
+                             mkIRExpr_HWord( (HWord)first_hW ),
+                             mkIRExpr_HWord( (HWord)hWs_to_check )
                           )
                        );
          }
 
-         irsb->stmts[selfcheck_idx+4]
+         irsb->stmts[selfcheck_idx + i * 5 + 4]
             = IRStmt_Exit( 
                  IRExpr_Binop( 
-                    Iop_CmpNE32,
+                    host_word_type==Ity_I64 ? Iop_CmpNE64 : Iop_CmpNE32,
                     callexpr,
-                    IRExpr_Const(IRConst_U32(expected32))
+                       host_word_type==Ity_I64
+                          ? IRExpr_Const(IRConst_U64(expectedhW))
+                          : IRExpr_Const(IRConst_U32(expectedhW))
                  ),
                  Ijk_TInval,
+                 /* Where we must restart if there's a failure: at the
+                    first extent, regardless of which extent the
+                    failure actually happened in. */
                  guest_IP_bbstart_IRConst
               );
-      }
+      } /* for (i = 0; i < vge->n_used; i++) */
    }
 
    return irsb;
@@ -558,19 +702,19 @@
 /* CALLED FROM GENERATED CODE */
 
 /* Compute a checksum of host memory at [addr .. addr+len-1], as fast
-   as possible.  The _4al_4plus version is assured that the request is
-   for 4-aligned memory and for a block of 4 or more long, whilst the
-   _generic version must be able to handle any alignment, and lengths
-   down to zero too.  This fn is called once for every use of a
-   self-checking translation, so it needs to be as fast as
-   possible. */
+   as possible.  All _4al versions assume that the supplied address is
+   4 aligned.  All length values are in 4-byte chunks.  These fns
+   arecalled once for every use of a self-checking translation, so
+   they needs to be as fast as possible. */
+
+/* --- 32-bit versions, used only on 32-bit hosts --- */
 
 static inline UInt ROL32 ( UInt w, Int n ) {
    w = (w << n) | (w >> (32-n));
    return w;
 }
 
-__attribute((regparm(2)))
+VEX_REGPARM(2)
 static UInt genericg_compute_checksum_4al ( HWord first_w32, HWord n_w32s )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -598,7 +742,7 @@
 
 /* Specialised versions of the above function */
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_1 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -609,7 +753,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_2 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -622,7 +766,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_3 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -637,7 +781,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_4 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -651,7 +795,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_5 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -667,7 +811,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_6 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -685,7 +829,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_7 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -705,7 +849,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_8 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -724,7 +868,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_9 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -745,7 +889,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_10 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -768,7 +912,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_11 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -793,7 +937,7 @@
    return sum1 + sum2;
 }
 
-__attribute__((regparm(1)))
+VEX_REGPARM(1)
 static UInt genericg_compute_checksum_4al_12 ( HWord first_w32 )
 {
    UInt  sum1 = 0, sum2 = 0;
@@ -817,6 +961,261 @@
    return sum1 + sum2;
 }
 
+
+/* --- 64-bit versions, used only on 64-bit hosts --- */
+
+static inline ULong ROL64 ( ULong w, Int n ) {
+   w = (w << n) | (w >> (64-n));
+   return w;
+}
+
+VEX_REGPARM(2)
+static ULong genericg_compute_checksum_8al ( HWord first_w64, HWord n_w64s )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   /* unrolled */
+   while (n_w64s >= 4) {
+      ULong  w;
+      w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+      w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+      w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+      w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+      p += 4;
+      n_w64s -= 4;
+      sum1 ^= sum2;
+   }
+   while (n_w64s >= 1) {
+      ULong  w;
+      w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+      p += 1;
+      n_w64s -= 1;
+      sum1 ^= sum2;
+   }
+   return sum1 + sum2;
+}
+
+/* Specialised versions of the above function */
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_1 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_2 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_3 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_4 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_5 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_6 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_7 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_8 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[7];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_9 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[7];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[8];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_10 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[7];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[8];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[9];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_11 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[7];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[8];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[9];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[10]; sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
+VEX_REGPARM(1)
+static ULong genericg_compute_checksum_8al_12 ( HWord first_w64 )
+{
+   ULong  sum1 = 0, sum2 = 0;
+   ULong* p = (ULong*)first_w64;
+   ULong  w;
+   w = p[0];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[1];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[2];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[3];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[4];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[5];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[6];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[7];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   w = p[8];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[9];  sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[10]; sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   w = p[11]; sum1 = ROL64(sum1 ^ w, 63);  sum2 += w;
+   sum1 ^= sum2;
+   return sum1 + sum2;
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                 guest_generic_bb_to_IR.c ---*/
 /*--------------------------------------------------------------------*/
diff --git a/main/VEX/priv/guest_generic_bb_to_IR.h b/main/VEX/priv/guest_generic_bb_to_IR.h
index 9ea10cb..f623443 100644
--- a/main/VEX/priv/guest_generic_bb_to_IR.h
+++ b/main/VEX/priv/guest_generic_bb_to_IR.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -158,21 +158,24 @@
 
 /* See detailed comment in bb_to_IR.c. */
 extern
-IRSB* bb_to_IR ( /*OUT*/VexGuestExtents* vge,
-                 /*IN*/ void*            closure_opaque,
-                 /*IN*/ DisOneInstrFn    dis_instr_fn,
-                 /*IN*/ UChar*           guest_code,
-                 /*IN*/ Addr64           guest_IP_bbstart,
-                 /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
-                 /*IN*/ Bool             host_bigendian,
-                 /*IN*/ VexArch          arch_guest,
-                 /*IN*/ VexArchInfo*     archinfo_guest,
-                 /*IN*/ VexAbiInfo*      abiinfo_both,
-                 /*IN*/ IRType           guest_word_type,
-                 /*IN*/ Bool             do_self_check,
-                 /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
-                 /*IN*/ Int              offB_TISTART,
-                 /*IN*/ Int              offB_TILEN );
+IRSB* bb_to_IR ( 
+         /*OUT*/VexGuestExtents* vge,
+         /*OUT*/UInt*            n_sc_extents,
+         /*IN*/ void*            callback_opaque,
+         /*IN*/ DisOneInstrFn    dis_instr_fn,
+         /*IN*/ UChar*           guest_code,
+         /*IN*/ Addr64           guest_IP_bbstart,
+         /*IN*/ Bool             (*chase_into_ok)(void*,Addr64),
+         /*IN*/ Bool             host_bigendian,
+         /*IN*/ VexArch          arch_guest,
+         /*IN*/ VexArchInfo*     archinfo_guest,
+         /*IN*/ VexAbiInfo*      abiinfo_both,
+         /*IN*/ IRType           guest_word_type,
+         /*IN*/ UInt             (*needs_self_check)(void*,VexGuestExtents*),
+         /*IN*/ Bool             (*preamble_function)(void*,IRSB*),
+         /*IN*/ Int              offB_TISTART,
+         /*IN*/ Int              offB_TILEN
+      );
 
 
 #endif /* ndef __VEX_GUEST_GENERIC_BB_TO_IR_H */
diff --git a/main/VEX/priv/guest_generic_x87.c b/main/VEX/priv/guest_generic_x87.c
index 306f660..9c683ab 100644
--- a/main/VEX/priv/guest_generic_x87.c
+++ b/main/VEX/priv/guest_generic_x87.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -717,7 +717,7 @@
    switch (imm8) {
       case 0x00:
       case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12:
-      case 0x1A: case 0x3A: case 0x44: case 0x4A:
+      case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A:
          break;
       default:
          return False;
diff --git a/main/VEX/priv/guest_generic_x87.h b/main/VEX/priv/guest_generic_x87.h
index 9cbe23b..997c2c2 100644
--- a/main/VEX/priv/guest_generic_x87.h
+++ b/main/VEX/priv/guest_generic_x87.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/guest_ppc_defs.h b/main/VEX/priv/guest_ppc_defs.h
index dd3c62e..7c8dc8e 100644
--- a/main/VEX/priv/guest_ppc_defs.h
+++ b/main/VEX/priv/guest_ppc_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -126,6 +126,10 @@
    /* 11 */ PPCG_FLAG_OP_SRAWI,   // srawi
    /* 12 */ PPCG_FLAG_OP_SRAD,    // srad
    /* 13 */ PPCG_FLAG_OP_SRADI,   // sradi
+   /* 14 */ PPCG_FLAG_OP_DIVDE,   // divdeo
+   /* 15 */ PPCG_FLAG_OP_DIVWEU,  // divweuo
+   /* 16 */ PPCG_FLAG_OP_DIVWE,   // divweo
+   /* 17 */ PPCG_FLAG_OP_DIVDEU,  // divdeuo
    PPCG_FLAG_OP_NUMBER
 };
 
diff --git a/main/VEX/priv/guest_ppc_helpers.c b/main/VEX/priv/guest_ppc_helpers.c
index 11aa428..b8a3cd0 100644
--- a/main/VEX/priv/guest_ppc_helpers.c
+++ b/main/VEX/priv/guest_ppc_helpers.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -385,74 +385,74 @@
    vex_state->guest_GPR30 = 0;
    vex_state->guest_GPR31 = 0;
 
-   vex_state->guest_FPR0  = 0;
-   vex_state->guest_FPR1  = 0;
-   vex_state->guest_FPR2  = 0;
-   vex_state->guest_FPR3  = 0;
-   vex_state->guest_FPR4  = 0;
-   vex_state->guest_FPR5  = 0;
-   vex_state->guest_FPR6  = 0;
-   vex_state->guest_FPR7  = 0;
-   vex_state->guest_FPR8  = 0;
-   vex_state->guest_FPR9  = 0;
-   vex_state->guest_FPR10 = 0;
-   vex_state->guest_FPR11 = 0;
-   vex_state->guest_FPR12 = 0;
-   vex_state->guest_FPR13 = 0;
-   vex_state->guest_FPR14 = 0;
-   vex_state->guest_FPR15 = 0;
-   vex_state->guest_FPR16 = 0;
-   vex_state->guest_FPR17 = 0;
-   vex_state->guest_FPR18 = 0;
-   vex_state->guest_FPR19 = 0;
-   vex_state->guest_FPR20 = 0;
-   vex_state->guest_FPR21 = 0;
-   vex_state->guest_FPR22 = 0;
-   vex_state->guest_FPR23 = 0;
-   vex_state->guest_FPR24 = 0;
-   vex_state->guest_FPR25 = 0;
-   vex_state->guest_FPR26 = 0;
-   vex_state->guest_FPR27 = 0;
-   vex_state->guest_FPR28 = 0;
-   vex_state->guest_FPR29 = 0;
-   vex_state->guest_FPR30 = 0;
-   vex_state->guest_FPR31 = 0;
 
    /* Initialise the vector state. */
 #  define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
 
-   VECZERO(vex_state->guest_VR0 );
-   VECZERO(vex_state->guest_VR1 );
-   VECZERO(vex_state->guest_VR2 );
-   VECZERO(vex_state->guest_VR3 );
-   VECZERO(vex_state->guest_VR4 );
-   VECZERO(vex_state->guest_VR5 );
-   VECZERO(vex_state->guest_VR6 );
-   VECZERO(vex_state->guest_VR7 );
-   VECZERO(vex_state->guest_VR8 );
-   VECZERO(vex_state->guest_VR9 );
-   VECZERO(vex_state->guest_VR10);
-   VECZERO(vex_state->guest_VR11);
-   VECZERO(vex_state->guest_VR12);
-   VECZERO(vex_state->guest_VR13);
-   VECZERO(vex_state->guest_VR14);
-   VECZERO(vex_state->guest_VR15);
-   VECZERO(vex_state->guest_VR16);
-   VECZERO(vex_state->guest_VR17);
-   VECZERO(vex_state->guest_VR18);
-   VECZERO(vex_state->guest_VR19);
-   VECZERO(vex_state->guest_VR20);
-   VECZERO(vex_state->guest_VR21);
-   VECZERO(vex_state->guest_VR22);
-   VECZERO(vex_state->guest_VR23);
-   VECZERO(vex_state->guest_VR24);
-   VECZERO(vex_state->guest_VR25);
-   VECZERO(vex_state->guest_VR26);
-   VECZERO(vex_state->guest_VR27);
-   VECZERO(vex_state->guest_VR28);
-   VECZERO(vex_state->guest_VR29);
-   VECZERO(vex_state->guest_VR30);
-   VECZERO(vex_state->guest_VR31);
+   VECZERO(vex_state->guest_VSR0 );
+   VECZERO(vex_state->guest_VSR1 );
+   VECZERO(vex_state->guest_VSR2 );
+   VECZERO(vex_state->guest_VSR3 );
+   VECZERO(vex_state->guest_VSR4 );
+   VECZERO(vex_state->guest_VSR5 );
+   VECZERO(vex_state->guest_VSR6 );
+   VECZERO(vex_state->guest_VSR7 );
+   VECZERO(vex_state->guest_VSR8 );
+   VECZERO(vex_state->guest_VSR9 );
+   VECZERO(vex_state->guest_VSR10);
+   VECZERO(vex_state->guest_VSR11);
+   VECZERO(vex_state->guest_VSR12);
+   VECZERO(vex_state->guest_VSR13);
+   VECZERO(vex_state->guest_VSR14);
+   VECZERO(vex_state->guest_VSR15);
+   VECZERO(vex_state->guest_VSR16);
+   VECZERO(vex_state->guest_VSR17);
+   VECZERO(vex_state->guest_VSR18);
+   VECZERO(vex_state->guest_VSR19);
+   VECZERO(vex_state->guest_VSR20);
+   VECZERO(vex_state->guest_VSR21);
+   VECZERO(vex_state->guest_VSR22);
+   VECZERO(vex_state->guest_VSR23);
+   VECZERO(vex_state->guest_VSR24);
+   VECZERO(vex_state->guest_VSR25);
+   VECZERO(vex_state->guest_VSR26);
+   VECZERO(vex_state->guest_VSR27);
+   VECZERO(vex_state->guest_VSR28);
+   VECZERO(vex_state->guest_VSR29);
+   VECZERO(vex_state->guest_VSR30);
+   VECZERO(vex_state->guest_VSR31);
+   VECZERO(vex_state->guest_VSR32);
+   VECZERO(vex_state->guest_VSR33);
+   VECZERO(vex_state->guest_VSR34);
+   VECZERO(vex_state->guest_VSR35);
+   VECZERO(vex_state->guest_VSR36);
+   VECZERO(vex_state->guest_VSR37);
+   VECZERO(vex_state->guest_VSR38);
+   VECZERO(vex_state->guest_VSR39);
+   VECZERO(vex_state->guest_VSR40);
+   VECZERO(vex_state->guest_VSR41);
+   VECZERO(vex_state->guest_VSR42);
+   VECZERO(vex_state->guest_VSR43);
+   VECZERO(vex_state->guest_VSR44);
+   VECZERO(vex_state->guest_VSR45);
+   VECZERO(vex_state->guest_VSR46);
+   VECZERO(vex_state->guest_VSR47);
+   VECZERO(vex_state->guest_VSR48);
+   VECZERO(vex_state->guest_VSR49);
+   VECZERO(vex_state->guest_VSR50);
+   VECZERO(vex_state->guest_VSR51);
+   VECZERO(vex_state->guest_VSR52);
+   VECZERO(vex_state->guest_VSR53);
+   VECZERO(vex_state->guest_VSR54);
+   VECZERO(vex_state->guest_VSR55);
+   VECZERO(vex_state->guest_VSR56);
+   VECZERO(vex_state->guest_VSR57);
+   VECZERO(vex_state->guest_VSR58);
+   VECZERO(vex_state->guest_VSR59);
+   VECZERO(vex_state->guest_VSR60);
+   VECZERO(vex_state->guest_VSR61);
+   VECZERO(vex_state->guest_VSR62);
+   VECZERO(vex_state->guest_VSR63);
 
 #  undef VECZERO
 
@@ -542,74 +542,73 @@
    vex_state->guest_GPR30 = 0;
    vex_state->guest_GPR31 = 0;
 
-   vex_state->guest_FPR0  = 0;
-   vex_state->guest_FPR1  = 0;
-   vex_state->guest_FPR2  = 0;
-   vex_state->guest_FPR3  = 0;
-   vex_state->guest_FPR4  = 0;
-   vex_state->guest_FPR5  = 0;
-   vex_state->guest_FPR6  = 0;
-   vex_state->guest_FPR7  = 0;
-   vex_state->guest_FPR8  = 0;
-   vex_state->guest_FPR9  = 0;
-   vex_state->guest_FPR10 = 0;
-   vex_state->guest_FPR11 = 0;
-   vex_state->guest_FPR12 = 0;
-   vex_state->guest_FPR13 = 0;
-   vex_state->guest_FPR14 = 0;
-   vex_state->guest_FPR15 = 0;
-   vex_state->guest_FPR16 = 0;
-   vex_state->guest_FPR17 = 0;
-   vex_state->guest_FPR18 = 0;
-   vex_state->guest_FPR19 = 0;
-   vex_state->guest_FPR20 = 0;
-   vex_state->guest_FPR21 = 0;
-   vex_state->guest_FPR22 = 0;
-   vex_state->guest_FPR23 = 0;
-   vex_state->guest_FPR24 = 0;
-   vex_state->guest_FPR25 = 0;
-   vex_state->guest_FPR26 = 0;
-   vex_state->guest_FPR27 = 0;
-   vex_state->guest_FPR28 = 0;
-   vex_state->guest_FPR29 = 0;
-   vex_state->guest_FPR30 = 0;
-   vex_state->guest_FPR31 = 0;
-
    /* Initialise the vector state. */
 #  define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
 
-   VECZERO(vex_state->guest_VR0 );
-   VECZERO(vex_state->guest_VR1 );
-   VECZERO(vex_state->guest_VR2 );
-   VECZERO(vex_state->guest_VR3 );
-   VECZERO(vex_state->guest_VR4 );
-   VECZERO(vex_state->guest_VR5 );
-   VECZERO(vex_state->guest_VR6 );
-   VECZERO(vex_state->guest_VR7 );
-   VECZERO(vex_state->guest_VR8 );
-   VECZERO(vex_state->guest_VR9 );
-   VECZERO(vex_state->guest_VR10);
-   VECZERO(vex_state->guest_VR11);
-   VECZERO(vex_state->guest_VR12);
-   VECZERO(vex_state->guest_VR13);
-   VECZERO(vex_state->guest_VR14);
-   VECZERO(vex_state->guest_VR15);
-   VECZERO(vex_state->guest_VR16);
-   VECZERO(vex_state->guest_VR17);
-   VECZERO(vex_state->guest_VR18);
-   VECZERO(vex_state->guest_VR19);
-   VECZERO(vex_state->guest_VR20);
-   VECZERO(vex_state->guest_VR21);
-   VECZERO(vex_state->guest_VR22);
-   VECZERO(vex_state->guest_VR23);
-   VECZERO(vex_state->guest_VR24);
-   VECZERO(vex_state->guest_VR25);
-   VECZERO(vex_state->guest_VR26);
-   VECZERO(vex_state->guest_VR27);
-   VECZERO(vex_state->guest_VR28);
-   VECZERO(vex_state->guest_VR29);
-   VECZERO(vex_state->guest_VR30);
-   VECZERO(vex_state->guest_VR31);
+   VECZERO(vex_state->guest_VSR0 );
+   VECZERO(vex_state->guest_VSR1 );
+   VECZERO(vex_state->guest_VSR2 );
+   VECZERO(vex_state->guest_VSR3 );
+   VECZERO(vex_state->guest_VSR4 );
+   VECZERO(vex_state->guest_VSR5 );
+   VECZERO(vex_state->guest_VSR6 );
+   VECZERO(vex_state->guest_VSR7 );
+   VECZERO(vex_state->guest_VSR8 );
+   VECZERO(vex_state->guest_VSR9 );
+   VECZERO(vex_state->guest_VSR10);
+   VECZERO(vex_state->guest_VSR11);
+   VECZERO(vex_state->guest_VSR12);
+   VECZERO(vex_state->guest_VSR13);
+   VECZERO(vex_state->guest_VSR14);
+   VECZERO(vex_state->guest_VSR15);
+   VECZERO(vex_state->guest_VSR16);
+   VECZERO(vex_state->guest_VSR17);
+   VECZERO(vex_state->guest_VSR18);
+   VECZERO(vex_state->guest_VSR19);
+   VECZERO(vex_state->guest_VSR20);
+   VECZERO(vex_state->guest_VSR21);
+   VECZERO(vex_state->guest_VSR22);
+   VECZERO(vex_state->guest_VSR23);
+   VECZERO(vex_state->guest_VSR24);
+   VECZERO(vex_state->guest_VSR25);
+   VECZERO(vex_state->guest_VSR26);
+   VECZERO(vex_state->guest_VSR27);
+   VECZERO(vex_state->guest_VSR28);
+   VECZERO(vex_state->guest_VSR29);
+   VECZERO(vex_state->guest_VSR30);
+   VECZERO(vex_state->guest_VSR31);
+   VECZERO(vex_state->guest_VSR32);
+   VECZERO(vex_state->guest_VSR33);
+   VECZERO(vex_state->guest_VSR34);
+   VECZERO(vex_state->guest_VSR35);
+   VECZERO(vex_state->guest_VSR36);
+   VECZERO(vex_state->guest_VSR37);
+   VECZERO(vex_state->guest_VSR38);
+   VECZERO(vex_state->guest_VSR39);
+   VECZERO(vex_state->guest_VSR40);
+   VECZERO(vex_state->guest_VSR41);
+   VECZERO(vex_state->guest_VSR42);
+   VECZERO(vex_state->guest_VSR43);
+   VECZERO(vex_state->guest_VSR44);
+   VECZERO(vex_state->guest_VSR45);
+   VECZERO(vex_state->guest_VSR46);
+   VECZERO(vex_state->guest_VSR47);
+   VECZERO(vex_state->guest_VSR48);
+   VECZERO(vex_state->guest_VSR49);
+   VECZERO(vex_state->guest_VSR50);
+   VECZERO(vex_state->guest_VSR51);
+   VECZERO(vex_state->guest_VSR52);
+   VECZERO(vex_state->guest_VSR53);
+   VECZERO(vex_state->guest_VSR54);
+   VECZERO(vex_state->guest_VSR55);
+   VECZERO(vex_state->guest_VSR56);
+   VECZERO(vex_state->guest_VSR57);
+   VECZERO(vex_state->guest_VSR58);
+   VECZERO(vex_state->guest_VSR59);
+   VECZERO(vex_state->guest_VSR60);
+   VECZERO(vex_state->guest_VSR61);
+   VECZERO(vex_state->guest_VSR62);
+   VECZERO(vex_state->guest_VSR63);
 
 #  undef VECZERO
 
diff --git a/main/VEX/priv/guest_ppc_toIR.c b/main/VEX/priv/guest_ppc_toIR.c
index f8d220d..8789c12 100644
--- a/main/VEX/priv/guest_ppc_toIR.c
+++ b/main/VEX/priv/guest_ppc_toIR.c
@@ -1,4 +1,5 @@
 
+
 /*--------------------------------------------------------------------*/
 /*--- begin                                       guest_ppc_toIR.c ---*/
 /*--------------------------------------------------------------------*/
@@ -7,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -184,6 +185,11 @@
    }
 }
 
+#define SIGN_BIT  0x8000000000000000ULL
+#define SIGN_MASK 0x7fffffffffffffffULL
+#define SIGN_BIT32  0x80000000
+#define SIGN_MASK32 0x7fffffff
+
 
 /*------------------------------------------------------------*/
 /*--- Debugging output                                     ---*/
@@ -257,22 +263,60 @@
    return toUChar( IFIELD( instr, 21, 5 ) );
 }
 
+/* Extract XT (destination register) field, instr[0,25:21] */
+static UChar ifieldRegXT ( UInt instr )
+{
+  UChar upper_bit = toUChar (IFIELD (instr, 0, 1));
+  UChar lower_bits = toUChar (IFIELD (instr, 21, 5));
+  return (upper_bit << 5) | lower_bits;
+}
+
+/* Extract XS (store source register) field, instr[0,25:21] */
+static inline UChar ifieldRegXS ( UInt instr )
+{
+  return ifieldRegXT ( instr );
+}
+
 /* Extract RA (1st source register) field, instr[20:16] */
 static UChar ifieldRegA ( UInt instr ) {
    return toUChar( IFIELD( instr, 16, 5 ) );
 }
 
+/* Extract XA (1st source register) field, instr[2,20:16] */
+static UChar ifieldRegXA ( UInt instr )
+{
+  UChar upper_bit = toUChar (IFIELD (instr, 2, 1));
+  UChar lower_bits = toUChar (IFIELD (instr, 16, 5));
+  return (upper_bit << 5) | lower_bits;
+}
+
 /* Extract RB (2nd source register) field, instr[15:11] */
 static UChar ifieldRegB ( UInt instr ) {
    return toUChar( IFIELD( instr, 11, 5 ) );
 }
 
+/* Extract XB (2nd source register) field, instr[1,15:11] */
+static UChar ifieldRegXB ( UInt instr )
+{
+  UChar upper_bit = toUChar (IFIELD (instr, 1, 1));
+  UChar lower_bits = toUChar (IFIELD (instr, 11, 5));
+  return (upper_bit << 5) | lower_bits;
+}
+
 /* Extract RC (3rd source register) field, instr[10:6] */
 static UChar ifieldRegC ( UInt instr ) {
    return toUChar( IFIELD( instr, 6, 5 ) );
 }
 
-/* Extract 2nd lowest bit, instr[1] */
+/* Extract XC (3rd source register) field, instr[3,10:6] */
+static UChar ifieldRegXC ( UInt instr )
+{
+  UChar upper_bit = toUChar (IFIELD (instr, 3, 1));
+  UChar lower_bits = toUChar (IFIELD (instr, 6, 5));
+  return (upper_bit << 5) | lower_bits;
+}
+
+/* Extract bit 10, instr[10] */
 static UChar ifieldBIT10 ( UInt instr ) {
    return toUChar( IFIELD( instr, 10, 1 ) );
 }
@@ -297,6 +341,16 @@
    return instr & 0x3FFFFFF;
 }
 
+/* Extract DM field, instr[9:8] */
+static UChar ifieldDM ( UInt instr ) {
+   return toUChar( IFIELD( instr, 8, 2 ) );
+}
+
+/* Extract SHW field, instr[9:8] */
+static inline UChar ifieldSHW ( UInt instr )
+{
+  return ifieldDM ( instr );
+}
 
 /*------------------------------------------------------------*/
 /*--- Guest-state identifiers                              ---*/
@@ -506,6 +560,12 @@
    return IRExpr_Const(IRConst_U64(i));
 }
 
+static IRExpr* mkV128 ( UShort i )
+{
+   vassert(i == 0 || i == 0xffff);
+   return IRExpr_Const(IRConst_V128(i));
+}
+
 /* This generates a normal (non load-linked) load. */
 static IRExpr* loadBE ( IRType ty, IRExpr* addr )
 {
@@ -600,6 +660,44 @@
                        binop(Iop_ShrV128, vIn, mkU8(16))) );
 }
 
+/* break V128 to 4xF64's*/
+static void breakV128to4xF64( IRExpr* t128,
+                              /*OUTs*/
+                              IRTemp* t3, IRTemp* t2,
+                              IRTemp* t1, IRTemp* t0 )
+{
+   IRTemp hi64 = newTemp(Ity_I64);
+   IRTemp lo64 = newTemp(Ity_I64);
+
+   vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128);
+   vassert(t0 && *t0 == IRTemp_INVALID);
+   vassert(t1 && *t1 == IRTemp_INVALID);
+   vassert(t2 && *t2 == IRTemp_INVALID);
+   vassert(t3 && *t3 == IRTemp_INVALID);
+   *t0 = newTemp(Ity_F64);
+   *t1 = newTemp(Ity_F64);
+   *t2 = newTemp(Ity_F64);
+   *t3 = newTemp(Ity_F64);
+
+   assign( hi64, unop(Iop_V128HIto64, t128) );
+   assign( lo64, unop(Iop_V128to64,   t128) );
+   assign( *t3,
+           unop( Iop_F32toF64,
+                 unop( Iop_ReinterpI32asF32,
+                       unop( Iop_64HIto32, mkexpr( hi64 ) ) ) ) );
+   assign( *t2,
+           unop( Iop_F32toF64,
+                 unop( Iop_ReinterpI32asF32, unop( Iop_64to32, mkexpr( hi64 ) ) ) ) );
+   assign( *t1,
+           unop( Iop_F32toF64,
+                 unop( Iop_ReinterpI32asF32,
+                       unop( Iop_64HIto32, mkexpr( lo64 ) ) ) ) );
+   assign( *t0,
+           unop( Iop_F32toF64,
+                 unop( Iop_ReinterpI32asF32, unop( Iop_64to32, mkexpr( lo64 ) ) ) ) );
+}
+
+
 /* break V128 to 4xI32's, then sign-extend to I64's */
 static void breakV128to4x64S( IRExpr* t128,
                               /*OUTs*/
@@ -654,6 +752,33 @@
    assign( *t0, unop(Iop_32Uto64, unop(Iop_64to32,   mkexpr(lo64))) );
 }
 
+static void breakV128to4x32( IRExpr* t128,
+                              /*OUTs*/
+                              IRTemp* t3, IRTemp* t2,
+                              IRTemp* t1, IRTemp* t0 )
+{
+   IRTemp hi64 = newTemp(Ity_I64);
+   IRTemp lo64 = newTemp(Ity_I64);
+
+   vassert(typeOfIRExpr(irsb->tyenv, t128) == Ity_V128);
+   vassert(t0 && *t0 == IRTemp_INVALID);
+   vassert(t1 && *t1 == IRTemp_INVALID);
+   vassert(t2 && *t2 == IRTemp_INVALID);
+   vassert(t3 && *t3 == IRTemp_INVALID);
+   *t0 = newTemp(Ity_I32);
+   *t1 = newTemp(Ity_I32);
+   *t2 = newTemp(Ity_I32);
+   *t3 = newTemp(Ity_I32);
+
+   assign( hi64, unop(Iop_V128HIto64, t128) );
+   assign( lo64, unop(Iop_V128to64,   t128) );
+   assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
+   assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
+   assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
+   assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
+}
+
+
 /* Signed saturating narrow 64S to 32 */
 static IRExpr* mkQNarrow64Sto32 ( IRExpr* t64 )
 {
@@ -932,43 +1057,44 @@
 }
 
 
+/* Floating point egisters are mapped to VSX registers[0..31]. */
 static Int floatGuestRegOffset ( UInt archreg )
 {
    vassert(archreg < 32);
    
    switch (archreg) {
-   case  0: return offsetofPPCGuestState(guest_FPR0);
-   case  1: return offsetofPPCGuestState(guest_FPR1);
-   case  2: return offsetofPPCGuestState(guest_FPR2);
-   case  3: return offsetofPPCGuestState(guest_FPR3);
-   case  4: return offsetofPPCGuestState(guest_FPR4);
-   case  5: return offsetofPPCGuestState(guest_FPR5);
-   case  6: return offsetofPPCGuestState(guest_FPR6);
-   case  7: return offsetofPPCGuestState(guest_FPR7);
-   case  8: return offsetofPPCGuestState(guest_FPR8);
-   case  9: return offsetofPPCGuestState(guest_FPR9);
-   case 10: return offsetofPPCGuestState(guest_FPR10);
-   case 11: return offsetofPPCGuestState(guest_FPR11);
-   case 12: return offsetofPPCGuestState(guest_FPR12);
-   case 13: return offsetofPPCGuestState(guest_FPR13);
-   case 14: return offsetofPPCGuestState(guest_FPR14);
-   case 15: return offsetofPPCGuestState(guest_FPR15);
-   case 16: return offsetofPPCGuestState(guest_FPR16);
-   case 17: return offsetofPPCGuestState(guest_FPR17);
-   case 18: return offsetofPPCGuestState(guest_FPR18);
-   case 19: return offsetofPPCGuestState(guest_FPR19);
-   case 20: return offsetofPPCGuestState(guest_FPR20);
-   case 21: return offsetofPPCGuestState(guest_FPR21);
-   case 22: return offsetofPPCGuestState(guest_FPR22);
-   case 23: return offsetofPPCGuestState(guest_FPR23);
-   case 24: return offsetofPPCGuestState(guest_FPR24);
-   case 25: return offsetofPPCGuestState(guest_FPR25);
-   case 26: return offsetofPPCGuestState(guest_FPR26);
-   case 27: return offsetofPPCGuestState(guest_FPR27);
-   case 28: return offsetofPPCGuestState(guest_FPR28);
-   case 29: return offsetofPPCGuestState(guest_FPR29);
-   case 30: return offsetofPPCGuestState(guest_FPR30);
-   case 31: return offsetofPPCGuestState(guest_FPR31);
+   case  0: return offsetofPPCGuestState(guest_VSR0);
+   case  1: return offsetofPPCGuestState(guest_VSR1);
+   case  2: return offsetofPPCGuestState(guest_VSR2);
+   case  3: return offsetofPPCGuestState(guest_VSR3);
+   case  4: return offsetofPPCGuestState(guest_VSR4);
+   case  5: return offsetofPPCGuestState(guest_VSR5);
+   case  6: return offsetofPPCGuestState(guest_VSR6);
+   case  7: return offsetofPPCGuestState(guest_VSR7);
+   case  8: return offsetofPPCGuestState(guest_VSR8);
+   case  9: return offsetofPPCGuestState(guest_VSR9);
+   case 10: return offsetofPPCGuestState(guest_VSR10);
+   case 11: return offsetofPPCGuestState(guest_VSR11);
+   case 12: return offsetofPPCGuestState(guest_VSR12);
+   case 13: return offsetofPPCGuestState(guest_VSR13);
+   case 14: return offsetofPPCGuestState(guest_VSR14);
+   case 15: return offsetofPPCGuestState(guest_VSR15);
+   case 16: return offsetofPPCGuestState(guest_VSR16);
+   case 17: return offsetofPPCGuestState(guest_VSR17);
+   case 18: return offsetofPPCGuestState(guest_VSR18);
+   case 19: return offsetofPPCGuestState(guest_VSR19);
+   case 20: return offsetofPPCGuestState(guest_VSR20);
+   case 21: return offsetofPPCGuestState(guest_VSR21);
+   case 22: return offsetofPPCGuestState(guest_VSR22);
+   case 23: return offsetofPPCGuestState(guest_VSR23);
+   case 24: return offsetofPPCGuestState(guest_VSR24);
+   case 25: return offsetofPPCGuestState(guest_VSR25);
+   case 26: return offsetofPPCGuestState(guest_VSR26);
+   case 27: return offsetofPPCGuestState(guest_VSR27);
+   case 28: return offsetofPPCGuestState(guest_VSR28);
+   case 29: return offsetofPPCGuestState(guest_VSR29);
+   case 30: return offsetofPPCGuestState(guest_VSR30);
+   case 31: return offsetofPPCGuestState(guest_VSR31);
    default: break;
    }
    vpanic("floatGuestRegOffset(ppc)"); /*notreached*/
@@ -988,44 +1114,117 @@
    stmt( IRStmt_Put(floatGuestRegOffset(archreg), e) );
 }
 
+static Int vsxGuestRegOffset ( UInt archreg )
+{
+   vassert(archreg < 64);
+   switch (archreg) {
+   case  0: return offsetofPPCGuestState(guest_VSR0);
+   case  1: return offsetofPPCGuestState(guest_VSR1);
+   case  2: return offsetofPPCGuestState(guest_VSR2);
+   case  3: return offsetofPPCGuestState(guest_VSR3);
+   case  4: return offsetofPPCGuestState(guest_VSR4);
+   case  5: return offsetofPPCGuestState(guest_VSR5);
+   case  6: return offsetofPPCGuestState(guest_VSR6);
+   case  7: return offsetofPPCGuestState(guest_VSR7);
+   case  8: return offsetofPPCGuestState(guest_VSR8);
+   case  9: return offsetofPPCGuestState(guest_VSR9);
+   case 10: return offsetofPPCGuestState(guest_VSR10);
+   case 11: return offsetofPPCGuestState(guest_VSR11);
+   case 12: return offsetofPPCGuestState(guest_VSR12);
+   case 13: return offsetofPPCGuestState(guest_VSR13);
+   case 14: return offsetofPPCGuestState(guest_VSR14);
+   case 15: return offsetofPPCGuestState(guest_VSR15);
+   case 16: return offsetofPPCGuestState(guest_VSR16);
+   case 17: return offsetofPPCGuestState(guest_VSR17);
+   case 18: return offsetofPPCGuestState(guest_VSR18);
+   case 19: return offsetofPPCGuestState(guest_VSR19);
+   case 20: return offsetofPPCGuestState(guest_VSR20);
+   case 21: return offsetofPPCGuestState(guest_VSR21);
+   case 22: return offsetofPPCGuestState(guest_VSR22);
+   case 23: return offsetofPPCGuestState(guest_VSR23);
+   case 24: return offsetofPPCGuestState(guest_VSR24);
+   case 25: return offsetofPPCGuestState(guest_VSR25);
+   case 26: return offsetofPPCGuestState(guest_VSR26);
+   case 27: return offsetofPPCGuestState(guest_VSR27);
+   case 28: return offsetofPPCGuestState(guest_VSR28);
+   case 29: return offsetofPPCGuestState(guest_VSR29);
+   case 30: return offsetofPPCGuestState(guest_VSR30);
+   case 31: return offsetofPPCGuestState(guest_VSR31);
+   case 32: return offsetofPPCGuestState(guest_VSR32);
+   case 33: return offsetofPPCGuestState(guest_VSR33);
+   case 34: return offsetofPPCGuestState(guest_VSR34);
+   case 35: return offsetofPPCGuestState(guest_VSR35);
+   case 36: return offsetofPPCGuestState(guest_VSR36);
+   case 37: return offsetofPPCGuestState(guest_VSR37);
+   case 38: return offsetofPPCGuestState(guest_VSR38);
+   case 39: return offsetofPPCGuestState(guest_VSR39);
+   case 40: return offsetofPPCGuestState(guest_VSR40);
+   case 41: return offsetofPPCGuestState(guest_VSR41);
+   case 42: return offsetofPPCGuestState(guest_VSR42);
+   case 43: return offsetofPPCGuestState(guest_VSR43);
+   case 44: return offsetofPPCGuestState(guest_VSR44);
+   case 45: return offsetofPPCGuestState(guest_VSR45);
+   case 46: return offsetofPPCGuestState(guest_VSR46);
+   case 47: return offsetofPPCGuestState(guest_VSR47);
+   case 48: return offsetofPPCGuestState(guest_VSR48);
+   case 49: return offsetofPPCGuestState(guest_VSR49);
+   case 50: return offsetofPPCGuestState(guest_VSR50);
+   case 51: return offsetofPPCGuestState(guest_VSR51);
+   case 52: return offsetofPPCGuestState(guest_VSR52);
+   case 53: return offsetofPPCGuestState(guest_VSR53);
+   case 54: return offsetofPPCGuestState(guest_VSR54);
+   case 55: return offsetofPPCGuestState(guest_VSR55);
+   case 56: return offsetofPPCGuestState(guest_VSR56);
+   case 57: return offsetofPPCGuestState(guest_VSR57);
+   case 58: return offsetofPPCGuestState(guest_VSR58);
+   case 59: return offsetofPPCGuestState(guest_VSR59);
+   case 60: return offsetofPPCGuestState(guest_VSR60);
+   case 61: return offsetofPPCGuestState(guest_VSR61);
+   case 62: return offsetofPPCGuestState(guest_VSR62);
+   case 63: return offsetofPPCGuestState(guest_VSR63);
+   default: break;
+   }
+   vpanic("vsxGuestRegOffset(ppc)"); /*notreached*/
+}
 
+/* Vector registers are mapped to VSX registers[32..63]. */
 static Int vectorGuestRegOffset ( UInt archreg )
 {
    vassert(archreg < 32);
    
    switch (archreg) {
-   case  0: return offsetofPPCGuestState(guest_VR0);
-   case  1: return offsetofPPCGuestState(guest_VR1);
-   case  2: return offsetofPPCGuestState(guest_VR2);
-   case  3: return offsetofPPCGuestState(guest_VR3);
-   case  4: return offsetofPPCGuestState(guest_VR4);
-   case  5: return offsetofPPCGuestState(guest_VR5);
-   case  6: return offsetofPPCGuestState(guest_VR6);
-   case  7: return offsetofPPCGuestState(guest_VR7);
-   case  8: return offsetofPPCGuestState(guest_VR8);
-   case  9: return offsetofPPCGuestState(guest_VR9);
-   case 10: return offsetofPPCGuestState(guest_VR10);
-   case 11: return offsetofPPCGuestState(guest_VR11);
-   case 12: return offsetofPPCGuestState(guest_VR12);
-   case 13: return offsetofPPCGuestState(guest_VR13);
-   case 14: return offsetofPPCGuestState(guest_VR14);
-   case 15: return offsetofPPCGuestState(guest_VR15);
-   case 16: return offsetofPPCGuestState(guest_VR16);
-   case 17: return offsetofPPCGuestState(guest_VR17);
-   case 18: return offsetofPPCGuestState(guest_VR18);
-   case 19: return offsetofPPCGuestState(guest_VR19);
-   case 20: return offsetofPPCGuestState(guest_VR20);
-   case 21: return offsetofPPCGuestState(guest_VR21);
-   case 22: return offsetofPPCGuestState(guest_VR22);
-   case 23: return offsetofPPCGuestState(guest_VR23);
-   case 24: return offsetofPPCGuestState(guest_VR24);
-   case 25: return offsetofPPCGuestState(guest_VR25);
-   case 26: return offsetofPPCGuestState(guest_VR26);
-   case 27: return offsetofPPCGuestState(guest_VR27);
-   case 28: return offsetofPPCGuestState(guest_VR28);
-   case 29: return offsetofPPCGuestState(guest_VR29);
-   case 30: return offsetofPPCGuestState(guest_VR30);
-   case 31: return offsetofPPCGuestState(guest_VR31);
+   case  0: return offsetofPPCGuestState(guest_VSR32);
+   case  1: return offsetofPPCGuestState(guest_VSR33);
+   case  2: return offsetofPPCGuestState(guest_VSR34);
+   case  3: return offsetofPPCGuestState(guest_VSR35);
+   case  4: return offsetofPPCGuestState(guest_VSR36);
+   case  5: return offsetofPPCGuestState(guest_VSR37);
+   case  6: return offsetofPPCGuestState(guest_VSR38);
+   case  7: return offsetofPPCGuestState(guest_VSR39);
+   case  8: return offsetofPPCGuestState(guest_VSR40);
+   case  9: return offsetofPPCGuestState(guest_VSR41);
+   case 10: return offsetofPPCGuestState(guest_VSR42);
+   case 11: return offsetofPPCGuestState(guest_VSR43);
+   case 12: return offsetofPPCGuestState(guest_VSR44);
+   case 13: return offsetofPPCGuestState(guest_VSR45);
+   case 14: return offsetofPPCGuestState(guest_VSR46);
+   case 15: return offsetofPPCGuestState(guest_VSR47);
+   case 16: return offsetofPPCGuestState(guest_VSR48);
+   case 17: return offsetofPPCGuestState(guest_VSR49);
+   case 18: return offsetofPPCGuestState(guest_VSR50);
+   case 19: return offsetofPPCGuestState(guest_VSR51);
+   case 20: return offsetofPPCGuestState(guest_VSR52);
+   case 21: return offsetofPPCGuestState(guest_VSR53);
+   case 22: return offsetofPPCGuestState(guest_VSR54);
+   case 23: return offsetofPPCGuestState(guest_VSR55);
+   case 24: return offsetofPPCGuestState(guest_VSR56);
+   case 25: return offsetofPPCGuestState(guest_VSR57);
+   case 26: return offsetofPPCGuestState(guest_VSR58);
+   case 27: return offsetofPPCGuestState(guest_VSR59);
+   case 28: return offsetofPPCGuestState(guest_VSR60);
+   case 29: return offsetofPPCGuestState(guest_VSR61);
+   case 30: return offsetofPPCGuestState(guest_VSR62);
+   case 31: return offsetofPPCGuestState(guest_VSR63);
    default: break;
    }
    vpanic("vextorGuestRegOffset(ppc)"); /*notreached*/
@@ -1045,6 +1244,22 @@
    stmt( IRStmt_Put(vectorGuestRegOffset(archreg), e) );
 }
 
+/* Get contents of VSX guest register */
+static IRExpr* getVSReg ( UInt archreg )
+{
+   vassert(archreg < 64);
+   return IRExpr_Get( vsxGuestRegOffset(archreg), Ity_V128 );
+}
+
+/* Ditto, but write to a VSX reg instead. */
+static void putVSReg ( UInt archreg, IRExpr* e )
+{
+   vassert(archreg < 64);
+   vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
+   stmt( IRStmt_Put(vsxGuestRegOffset(archreg), e) );
+}
+
+
 static Int guestCR321offset ( UInt cr )
 {
    switch (cr) {
@@ -1075,6 +1290,69 @@
    }
 }
 
+/* Generate an IR sequence to do a popcount operation on the supplied
+   IRTemp, and return a new IRTemp holding the result.  'ty' may be
+   Ity_I32 or Ity_I64 only. */
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
+{
+   Int i, shift[6];
+   IRTemp mask[6];
+   IRTemp old = IRTemp_INVALID;
+   IRTemp nyu = IRTemp_INVALID;
+
+   vassert(ty == Ity_I64 || ty == Ity_I32);
+
+   if (ty == Ity_I32) {
+      for (i = 0; i < 5; i++) {
+         mask[i]  = newTemp(ty);
+         shift[i] = 1 << i;
+      }
+      assign(mask[0], mkU32(0x55555555));
+      assign(mask[1], mkU32(0x33333333));
+      assign(mask[2], mkU32(0x0F0F0F0F));
+      assign(mask[3], mkU32(0x00FF00FF));
+      assign(mask[4], mkU32(0x0000FFFF));
+      old = src;
+      for (i = 0; i < 5; i++) {
+         nyu = newTemp(ty);
+         assign(nyu,
+                binop(Iop_Add32,
+                      binop(Iop_And32,
+                            mkexpr(old),
+                            mkexpr(mask[i])),
+                      binop(Iop_And32,
+                            binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
+                            mkexpr(mask[i]))));
+         old = nyu;
+      }
+      return nyu;
+   }
+// else, ty == Ity_I64
+   for (i = 0; i < 6; i++) {
+      mask[i] = newTemp( Ity_I64 );
+      shift[i] = 1 << i;
+   }
+   assign( mask[0], mkU64( 0x5555555555555555ULL ) );
+   assign( mask[1], mkU64( 0x3333333333333333ULL ) );
+   assign( mask[2], mkU64( 0x0F0F0F0F0F0F0F0FULL ) );
+   assign( mask[3], mkU64( 0x00FF00FF00FF00FFULL ) );
+   assign( mask[4], mkU64( 0x0000FFFF0000FFFFULL ) );
+   assign( mask[5], mkU64( 0x00000000FFFFFFFFULL ) );
+   old = src;
+   for (i = 0; i < 6; i++) {
+      nyu = newTemp( Ity_I64 );
+      assign( nyu,
+              binop( Iop_Add64,
+                     binop( Iop_And64, mkexpr( old ), mkexpr( mask[i] ) ),
+                     binop( Iop_And64,
+                            binop( Iop_Shr64, mkexpr( old ), mkU8( shift[i] ) ),
+                            mkexpr( mask[i] ) ) ) );
+      old = nyu;
+   }
+   return nyu;
+}
+
+
 // ROTL(src32/64, rot_amt5/6)
 static IRExpr* /* :: Ity_I32/64 */ ROTL ( IRExpr* src,
                                           IRExpr* rot_amt )
@@ -1400,7 +1678,9 @@
 }
 
 
-/* Set the CR6 flags following an AltiVec compare operation. */
+/* Set the CR6 flags following an AltiVec compare operation.
+ * NOTE: This also works for VSX single-precision compares.
+ * */
 static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones )
 {
    /* CR6[0:3] = {all_ones, 0, all_zeros, 0}
@@ -1620,6 +1900,28 @@
          = binop(Iop_Shr32, xer_ov, mkU8(31) );
       break;
       
+   case PPCG_FLAG_OP_DIVWEU:
+      xer_ov
+               = binop( Iop_Or32,
+                        unop( Iop_1Uto32, binop( Iop_CmpEQ32, argR, mkU32( 0 ) ) ),
+                        unop( Iop_1Uto32, binop( Iop_CmpLT32U, argR, argL ) ) );
+      break;
+
+   case PPCG_FLAG_OP_DIVWE:
+
+      /* If argR == 0 of if the result cannot fit in the 32-bit destination register,
+       * then OV <- 1.   If dest reg is 0 AND both dividend and divisor are non-zero,
+       * an overflow is implied.
+       */
+      xer_ov = binop( Iop_Or32,
+                      unop( Iop_1Uto32, binop( Iop_CmpEQ32, argR, mkU32( 0 ) ) ),
+                      unop( Iop_1Uto32, mkAND1( binop( Iop_CmpEQ32, res, mkU32( 0 ) ),
+                              mkAND1( binop( Iop_CmpNE32, argL, mkU32( 0 ) ),
+                                      binop( Iop_CmpNE32, argR, mkU32( 0 ) ) ) ) ) );
+      break;
+
+
+
    default: 
       vex_printf("set_XER_OV: op = %u\n", op);
       vpanic("set_XER_OV(ppc)");
@@ -1725,6 +2027,26 @@
          = unop(Iop_64to1, binop(Iop_Shr64, xer_ov, mkU8(63)));
       break;
       
+   case PPCG_FLAG_OP_DIVDE:
+
+      /* If argR == 0, we must set the OV bit.  But there's another condition
+       * where we can get overflow set for divde . . . when the
+       * result cannot fit in the 64-bit destination register.  If dest reg is 0 AND
+       * both dividend and divisor are non-zero, it implies an overflow.
+       */
+      xer_ov
+                  = mkOR1( binop( Iop_CmpEQ64, argR, mkU64( 0 ) ),
+                           mkAND1( binop( Iop_CmpEQ64, res, mkU64( 0 ) ),
+                                   mkAND1( binop( Iop_CmpNE64, argL, mkU64( 0 ) ),
+                                           binop( Iop_CmpNE64, argR, mkU64( 0 ) ) ) ) );
+      break;
+
+   case PPCG_FLAG_OP_DIVDEU:
+     /* If argR == 0 or if argL >= argR, set OV. */
+     xer_ov = mkOR1( binop( Iop_CmpEQ64, argR, mkU64( 0 ) ),
+                         binop( Iop_CmpLE64U, argR, argL ) );
+     break;
+
    default: 
       vex_printf("set_XER_OV: op = %u\n", op);
       vpanic("set_XER_OV(ppc64)");
@@ -2345,7 +2667,220 @@
    }
 }
 
+/*------------------------------------------------------------*/
+/* Helpers for VSX instructions that do floating point
+ * operations and need to determine if a src contains a
+ * special FP value.
+ *
+ *------------------------------------------------------------*/
 
+#define NONZERO_FRAC_MASK 0x000fffffffffffffULL
+#define FP_FRAC_PART(x) binop( Iop_And64, \
+                               mkexpr( x ), \
+                               mkU64( NONZERO_FRAC_MASK ) )
+
+// Returns exponent part of a single precision floating point as I32
+static IRExpr * fp_exp_part_sp(IRTemp src)
+{
+   return binop( Iop_And32,
+                 binop( Iop_Shr32, mkexpr( src ), mkU8( 23 ) ),
+                 mkU32( 0xff ) );
+}
+
+// Returns exponent part of floating point as I32
+static IRExpr * fp_exp_part(IRTemp src, Bool sp)
+{
+   IRExpr * exp;
+   if (sp)
+      return fp_exp_part_sp(src);
+
+   if (!mode64)
+      exp = binop( Iop_And32, binop( Iop_Shr32, unop( Iop_64HIto32,
+                                                      mkexpr( src ) ),
+                                     mkU8( 20 ) ), mkU32( 0x7ff ) );
+   else
+      exp = unop( Iop_64to32,
+                  binop( Iop_And64,
+                         binop( Iop_Shr64, mkexpr( src ), mkU8( 52 ) ),
+                         mkU64( 0x7ff ) ) );
+   return exp;
+}
+
+static IRExpr * is_Inf_sp(IRTemp src)
+{
+   IRTemp frac_part = newTemp(Ity_I32);
+   IRExpr * Inf_exp;
+
+   assign( frac_part, binop( Iop_And32, mkexpr(src), mkU32(0x007fffff)) );
+   Inf_exp = binop( Iop_CmpEQ32, fp_exp_part( src, True /*single precision*/ ), mkU32( 0xff ) );
+   return mkAND1( Inf_exp, binop( Iop_CmpEQ32, mkexpr( frac_part ), mkU32( 0 ) ) );
+}
+
+
+// Infinity: exp = 7ff and fraction is zero; s = 0/1
+static IRExpr * is_Inf(IRTemp src, Bool sp)
+{
+   IRExpr * Inf_exp, * hi32, * low32;
+   IRTemp frac_part;
+
+   if (sp)
+      return is_Inf_sp(src);
+
+   frac_part = newTemp(Ity_I64);
+   assign( frac_part, FP_FRAC_PART(src) );
+   Inf_exp = binop( Iop_CmpEQ32, fp_exp_part( src, False /*not single precision*/  ), mkU32( 0x7ff ) );
+   hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
+   low32 = unop( Iop_64to32, mkexpr( frac_part ) );
+   return mkAND1( Inf_exp, binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ),
+                                  mkU32( 0 ) ) );
+}
+
+static IRExpr * is_Zero_sp(IRTemp src)
+{
+   IRTemp sign_less_part = newTemp(Ity_I32);
+   assign( sign_less_part, binop( Iop_And32, mkexpr( src ), mkU32( SIGN_MASK32 ) ) );
+   return binop( Iop_CmpEQ32, mkexpr( sign_less_part ), mkU32( 0 ) );
+}
+
+// Zero: exp is zero and fraction is zero; s = 0/1
+static IRExpr * is_Zero(IRTemp src, Bool sp)
+{
+   IRExpr * hi32, * low32;
+   IRTemp sign_less_part;
+   if (sp)
+      return is_Zero_sp(src);
+
+   sign_less_part = newTemp(Ity_I64);
+
+   assign( sign_less_part, binop( Iop_And64, mkexpr( src ), mkU64( SIGN_MASK ) ) );
+   hi32 = unop( Iop_64HIto32, mkexpr( sign_less_part ) );
+   low32 = unop( Iop_64to32, mkexpr( sign_less_part ) );
+   return binop( Iop_CmpEQ32, binop( Iop_Or32, low32, hi32 ),
+                              mkU32( 0 ) );
+}
+
+/*  SNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '1'
+ *  QNAN: s = 1/0; exp = 0x7ff; fraction is nonzero, with highest bit '0'
+ *  This function returns an IRExpr value of '1' for any type of NaN.
+ */
+static IRExpr * is_NaN(IRTemp src)
+{
+   IRExpr * NaN_exp, * hi32, * low32;
+   IRTemp frac_part = newTemp(Ity_I64);
+
+   assign( frac_part, FP_FRAC_PART(src) );
+   hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
+   low32 = unop( Iop_64to32, mkexpr( frac_part ) );
+   NaN_exp = binop( Iop_CmpEQ32, fp_exp_part( src, False /*not single precision*/ ),
+                    mkU32( 0x7ff ) );
+
+   return mkAND1( NaN_exp, binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ),
+                                               mkU32( 0 ) ) );
+}
+
+/* This function returns an IRExpr value of '1' for any type of NaN.
+ * The passed 'src' argument is assumed to be Ity_I32.
+ */
+static IRExpr * is_NaN_32(IRTemp src)
+{
+#define NONZERO_FRAC_MASK32 0x007fffffULL
+#define FP_FRAC_PART32(x) binop( Iop_And32, \
+                                 mkexpr( x ), \
+                                 mkU32( NONZERO_FRAC_MASK32 ) )
+
+   IRExpr * frac_part = FP_FRAC_PART32(src);
+   IRExpr * exp_part = binop( Iop_And32,
+                              binop( Iop_Shr32, mkexpr( src ), mkU8( 23 ) ),
+                              mkU32( 0x0ff ) );
+   IRExpr * NaN_exp = binop( Iop_CmpEQ32, exp_part, mkU32( 0xff ) );
+
+   return mkAND1( NaN_exp, binop( Iop_CmpNE32, frac_part, mkU32( 0 ) ) );
+}
+
+/* This helper function performs the negation part of operations of the form:
+ *    "Negate Multiply-<op>"
+ *  where "<op>" is either "Add" or "Sub".
+ *
+ * This function takes one argument -- the floating point intermediate result (converted to
+ * Ity_I64 via Iop_ReinterpF64asI64) that was obtained from the "Multip-<op>" part of
+ * the operation described above.
+ */
+static IRTemp getNegatedResult(IRTemp intermediateResult)
+{
+   ULong signbit_mask = 0x8000000000000000ULL;
+   IRTemp signbit_32 = newTemp(Ity_I32);
+   IRTemp resultantSignbit = newTemp(Ity_I1);
+   IRTemp negatedResult = newTemp(Ity_I64);
+   assign( signbit_32, binop( Iop_Shr32,
+                          unop( Iop_64HIto32,
+                                 binop( Iop_And64, mkexpr( intermediateResult ),
+                                        mkU64( signbit_mask ) ) ),
+                                 mkU8( 31 ) ) );
+   /* We negate the signbit if and only if the intermediate result from the
+    * multiply-<op> was NOT a NaN.  This is an XNOR predicate.
+    */
+   assign( resultantSignbit,
+        unop( Iop_Not1,
+              binop( Iop_CmpEQ32,
+                     binop( Iop_Xor32,
+                            mkexpr( signbit_32 ),
+                            unop( Iop_1Uto32, is_NaN( intermediateResult ) ) ),
+                     mkU32( 1 ) ) ) );
+
+   assign( negatedResult,
+        binop( Iop_Or64,
+               binop( Iop_And64,
+                      mkexpr( intermediateResult ),
+                      mkU64( ~signbit_mask ) ),
+               binop( Iop_32HLto64,
+                      binop( Iop_Shl32,
+                             unop( Iop_1Uto32, mkexpr( resultantSignbit ) ),
+                             mkU8( 31 ) ),
+                      mkU32( 0 ) ) ) );
+
+   return negatedResult;
+}
+
+/* This helper function performs the negation part of operations of the form:
+ *    "Negate Multiply-<op>"
+ *  where "<op>" is either "Add" or "Sub".
+ *
+ * This function takes one argument -- the floating point intermediate result (converted to
+ * Ity_I32 via Iop_ReinterpF32asI32) that was obtained from the "Multip-<op>" part of
+ * the operation described above.
+ */
+static IRTemp getNegatedResult_32(IRTemp intermediateResult)
+{
+   UInt signbit_mask = 0x80000000;
+   IRTemp signbit_32 = newTemp(Ity_I32);
+   IRTemp resultantSignbit = newTemp(Ity_I1);
+   IRTemp negatedResult = newTemp(Ity_I32);
+   assign( signbit_32, binop( Iop_Shr32,
+                                 binop( Iop_And32, mkexpr( intermediateResult ),
+                                        mkU32( signbit_mask ) ),
+                                 mkU8( 31 ) ) );
+   /* We negate the signbit if and only if the intermediate result from the
+    * multiply-<op> was NOT a NaN.  This is an XNOR predicate.
+    */
+   assign( resultantSignbit,
+        unop( Iop_Not1,
+              binop( Iop_CmpEQ32,
+                     binop( Iop_Xor32,
+                            mkexpr( signbit_32 ),
+                            unop( Iop_1Uto32, is_NaN_32( intermediateResult ) ) ),
+                     mkU32( 1 ) ) ) );
+
+   assign( negatedResult,
+           binop( Iop_Or32,
+                  binop( Iop_And32,
+                         mkexpr( intermediateResult ),
+                         mkU32( ~signbit_mask ) ),
+                  binop( Iop_Shl32,
+                         unop( Iop_1Uto32, mkexpr( resultantSignbit ) ),
+                         mkU8( 31 ) ) ) );
+
+   return negatedResult;
+}
 
 /*------------------------------------------------------------*/
 /*--- Integer Instruction Translation                     --- */
@@ -2867,6 +3402,106 @@
          break;
          /* Note: ditto comment divd, for (x / 0) */
 
+      case 0x18B: // divweu (Divide Word Extended Unsigned)
+      {
+        /*
+         *  If (RA) >= (RB), or if an attempt is made to perform the division
+         *         <anything> / 0
+         * then the contents of register RD are undefined as are (if Rc=1) the contents of
+         * the LT, GT, and EQ bits of CR Field 0. In these cases, if OE=1 then OV is set
+         * to 1.
+         */
+         IRTemp res = newTemp(Ity_I32);
+         IRExpr * dividend, * divisor;
+         DIP("divweu%s%s r%u,r%u,r%u\n",
+             flag_OE ? "o" : "", flag_rC ? ".":"",
+                                         rD_addr, rA_addr, rB_addr);
+         if (mode64) {
+            dividend = unop( Iop_64to32, mkexpr( rA ) );
+            divisor = unop( Iop_64to32, mkexpr( rB ) );
+            assign( res, binop( Iop_DivU32E, dividend, divisor ) );
+            assign( rD, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( res ) ) );
+         } else {
+            dividend = mkexpr( rA );
+            divisor =  mkexpr( rB );
+            assign( res, binop( Iop_DivU32E, dividend, divisor ) );
+            assign( rD, mkexpr( res) );
+         }
+
+         if (flag_OE) {
+            set_XER_OV_32( PPCG_FLAG_OP_DIVWEU,
+                           mkexpr(res), dividend, divisor );
+         }
+         break;
+      }
+
+      case 0x1AB: // divwe (Divide Word Extended)
+      {
+         /*
+          * If the quotient cannot be represented in 32 bits, or if an
+          * attempt is made to perform the division
+          *      <anything> / 0
+          * then the contents of register RD are undefined as are (if
+          * Rc=1) the contents of the LT, GT, and EQ bits of CR
+          * Field 0. In these cases, if OE=1 then OV is set to 1.
+          */
+
+         IRTemp res = newTemp(Ity_I32);
+         IRExpr * dividend, * divisor;
+         DIP("divwe%s%s r%u,r%u,r%u\n",
+             flag_OE ? "o" : "", flag_rC ? ".":"",
+                                         rD_addr, rA_addr, rB_addr);
+         if (mode64) {
+            dividend = unop( Iop_64to32, mkexpr( rA ) );
+            divisor = unop( Iop_64to32, mkexpr( rB ) );
+            assign( res, binop( Iop_DivS32E, dividend, divisor ) );
+            assign( rD, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( res ) ) );
+         } else {
+            dividend = mkexpr( rA );
+            divisor =  mkexpr( rB );
+            assign( res, binop( Iop_DivS32E, dividend, divisor ) );
+            assign( rD, mkexpr( res) );
+         }
+
+         if (flag_OE) {
+            set_XER_OV_32( PPCG_FLAG_OP_DIVWE,
+                           mkexpr(res), dividend, divisor );
+         }
+         break;
+      }
+
+
+      case 0x1A9: // divde (Divide Doubleword Extended)
+        /*
+         * If the quotient cannot be represented in 64 bits, or if an
+         * attempt is made to perform the division
+         *      <anything> / 0
+         * then the contents of register RD are undefined as are (if
+         * Rc=1) the contents of the LT, GT, and EQ bits of CR
+         * Field 0. In these cases, if OE=1 then OV is set to 1.
+         */
+         DIP("divde%s%s r%u,r%u,r%u\n",
+             flag_OE ? "o" : "", flag_rC ? ".":"",
+             rD_addr, rA_addr, rB_addr);
+         assign( rD, binop(Iop_DivS64E, mkexpr(rA), mkexpr(rB)) );
+         if (flag_OE) {
+            set_XER_OV_64( PPCG_FLAG_OP_DIVDE, mkexpr( rD ),
+                           mkexpr( rA ), mkexpr( rB ) );
+         }
+         break;
+
+      case 0x189: //  divdeuo (Divide Doubleword Extended Unsigned)
+        // Same CR and OV rules as given for divweu above
+        DIP("divdeu%s%s r%u,r%u,r%u\n",
+            flag_OE ? "o" : "", flag_rC ? ".":"",
+            rD_addr, rA_addr, rB_addr);
+        assign( rD, binop(Iop_DivU64E, mkexpr(rA), mkexpr(rB)) );
+        if (flag_OE) {
+           set_XER_OV_64( PPCG_FLAG_OP_DIVDEU, mkexpr( rD ),
+                          mkexpr( rA ), mkexpr( rB ) );
+        }
+        break;
+
       default:
          vex_printf("dis_int_arith(ppc)(opc2)\n");
          return False;
@@ -3075,7 +3710,7 @@
 
    /* X Form */
    case 0x1F:
-      do_rc = True;    // All below record to CR
+      do_rc = True; // All below record to CR, except for where we return at case end.
 
       switch (opc2) {
       case 0x01C: // and (AND, PPC32 p356)
@@ -3264,6 +3899,88 @@
          putFReg( rS_addr, mkexpr(frA));
          return True;
       }
+      case 0x1FA: // popcntd (population count doubleword
+      {
+    	  DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
+        IRTemp result = gen_POPCOUNT(ty, rS);
+    	  putIReg( rA_addr, mkexpr(result) );
+    	  return True;
+      }
+      case 0x17A: // popcntw (Population Count Words)
+      {
+         DIP("popcntw r%u,r%u\n", rA_addr, rS_addr);
+         if (mode64) {
+            IRTemp resultHi, resultLo;
+            IRTemp argLo = newTemp(Ity_I32);
+            IRTemp argHi = newTemp(Ity_I32);
+            assign(argLo, unop(Iop_64to32, mkexpr(rS)));
+            assign(argHi, unop(Iop_64HIto32, mkexpr(rS)));
+            resultLo = gen_POPCOUNT(Ity_I32, argLo);
+            resultHi = gen_POPCOUNT(Ity_I32, argHi);
+            putIReg( rA_addr, binop(Iop_32HLto64, mkexpr(resultHi), mkexpr(resultLo)));
+         } else {
+            IRTemp result = gen_POPCOUNT(ty, rS);
+            putIReg( rA_addr, mkexpr(result) );
+         }
+         return True;
+      }
+       case 0x0FC: // bpermd (Bit Permute Doubleword)
+       {
+          /* This is a lot of rigmarole to emulate bpermd like this, as it
+           * could be done much faster by implementing a call to the native
+           * instruction.  However, where possible I want to avoid using new
+           * native instructions so that we can use valgrind to emulate those
+           * instructions on older PPC64 hardware.
+           */
+ #define BPERMD_IDX_MASK 0x00000000000000FFULL
+ #define BPERMD_BIT_MASK 0x8000000000000000ULL
+          int i;
+          IRExpr * rS_expr = mkexpr(rS);
+          IRExpr * res = binop(Iop_And64, mkU64(0), mkU64(0));
+          DIP("bpermd r%u,r%u,r%u\n", rA_addr, rS_addr, rB_addr);
+          for (i = 0; i < 8; i++) {
+             IRTemp idx_tmp = newTemp( Ity_I64 );
+             IRTemp perm_bit = newTemp( Ity_I64 );
+             IRTemp idx = newTemp( Ity_I8 );
+             IRTemp idx_LT64 = newTemp( Ity_I1 );
+             IRTemp idx_LT64_ity64 = newTemp( Ity_I64 );
+
+             assign( idx_tmp,
+                     binop( Iop_And64, mkU64( BPERMD_IDX_MASK ), rS_expr ) );
+             assign( idx_LT64,
+                           binop( Iop_CmpLT64U, mkexpr( idx_tmp ), mkU64( 64 ) ) );
+             assign( idx,
+                           binop( Iop_And8,
+                                  unop( Iop_1Sto8,
+                                        mkexpr(idx_LT64) ),
+                                  unop( Iop_64to8, mkexpr( idx_tmp ) ) ) );
+             /* If idx_LT64 == 0, we must force the perm bit to '0'. Below, we se idx
+              * to determine which bit of rB to use for the perm bit, and then we shift
+              * that bit to the MSB position.  We AND that with a 64-bit-ized idx_LT64
+              * to set the final perm bit.
+              */
+             assign( idx_LT64_ity64,
+                           unop( Iop_32Uto64, unop( Iop_1Uto32, mkexpr(idx_LT64 ) ) ) );
+             assign( perm_bit,
+                           binop( Iop_And64,
+                                  mkexpr( idx_LT64_ity64 ),
+                                  binop( Iop_Shr64,
+                                         binop( Iop_And64,
+                                                mkU64( BPERMD_BIT_MASK ),
+                                                binop( Iop_Shl64,
+                                                       mkexpr( rB ),
+                                                       mkexpr( idx ) ) ),
+                                         mkU8( 63 ) ) ) );
+             res = binop( Iop_Or64,
+                                res,
+                                binop( Iop_Shl64,
+                                       mkexpr( perm_bit ),
+                                       mkU8( i ) ) );
+             rS_expr = binop( Iop_Shr64, rS_expr, mkU8( 8 ) );
+          }
+          putIReg(rA_addr, res);
+          return True;
+       }
 
       default:
          vex_printf("dis_int_logic(ppc)(opc2)\n");
@@ -5485,7 +6202,23 @@
          putIReg( rD_addr, mkWidenFrom32(ty, mkexpr(w2),
                                          /* Signed */False) );
          break;
-      
+
+      case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
+      {
+         IRExpr * nextAddr;
+         IRTemp w3 = newTemp( Ity_I32 );
+         IRTemp w4 = newTemp( Ity_I32 );
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
+         assign( w1, loadBE( Ity_I32, mkexpr( EA ) ) );
+         assign( w2, gen_byterev32( w1 ) );
+         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
+         assign( w3, loadBE( Ity_I32, nextAddr ) );
+         assign( w4, gen_byterev32( w3 ) );
+         putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
+         break;
+      }
+
       case 0x396: // sthbrx (Store Half Word Byte-Reverse Indexed, PPC32 p523)
          DIP("sthbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
@@ -5497,7 +6230,21 @@
          assign( w1, mkNarrowTo32(ty, getIReg(rS_addr)) );
          storeBE( mkexpr(EA), gen_byterev32(w1) );
          break;
-      
+
+      case 0x294: // stdbrx (Store Doubleword Byte-Reverse Indexed)
+      {
+         IRTemp lo = newTemp(Ity_I32);
+         IRTemp hi = newTemp(Ity_I32);
+         IRTemp rS = newTemp(Ity_I64);
+         assign( rS, getIReg( rS_addr ) );
+         DIP("stdbrx r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+         assign(lo, unop(Iop_64HIto32, mkexpr(rS)));
+         assign(hi, unop(Iop_64to32, mkexpr(rS)));
+         storeBE( mkexpr( EA ),
+                  binop( Iop_32HLto64, gen_byterev32( hi ), gen_byterev32( lo ) ) );
+         break;
+      }
+
       default:
          vex_printf("dis_int_ldst_rev(ppc)(opc2)\n");
          return False;
@@ -6059,6 +6806,17 @@
                                  binop(Iop_32HLto64, mkexpr(iHi), mkexpr(iLo))) );
          break;
 
+      case 0x377: // lfiwzx (Load floating-point as integer word, zero indexed
+      {
+         IRTemp dw = newTemp( Ity_I64 );
+         DIP("lfiwzx fr%u,r%u,r%u\n", frD_addr, rA_addr, rB_addr);
+         assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+         assign( iLo, loadBE(Ity_I32, mkexpr(EA)) );
+         assign( dw, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( iLo ) ) );
+         putFReg( frD_addr, unop( Iop_ReinterpI64asF64, mkexpr( dw ) ) );
+         break;
+      }
+
       default:
          vex_printf("dis_fp_load(ppc)(opc2)\n");
          return False;
@@ -6591,7 +7349,390 @@
    return True;
 }
 
+/*
+ * fe_flag is set to 1 if any of the following conditions occurs:
+ *  - The floating-point operand in register FRB is a Zero, a
+ *    NaN, an Infinity, or a negative value.
+ *  - e_b is less than or equal to: -970 for double precision; -103 for single precision
+ *  Otherwise fe_flag is set to 0.
+ *
+ * fg_flag is set to 1 if either of the following conditions occurs.
+ *   - The floating-point operand in register FRB is a Zero, an
+ *     Infinity, or a denormalized value.
+ *  Otherwise fg_flag is set to 0.
+ *
+ */
+static void do_fp_tsqrt(IRTemp frB_Int, Bool sp, IRTemp * fe_flag_tmp, IRTemp * fg_flag_tmp)
+{
+   // The following temps are for holding intermediate results
+   IRTemp e_b = newTemp(Ity_I32);
+   IRExpr * fe_flag,  * fg_flag;
+   IRTemp frB_exp_shR = newTemp(Ity_I32);
+   UInt bias = sp? 127 : 1023;
+   IRExpr * frbNaN, * frbDenorm, * frBNeg;
+   IRExpr * eb_LTE;
+   IRTemp  frbZero_tmp = newTemp(Ity_I1);
+   IRTemp  frbInf_tmp = newTemp(Ity_I1);
+   *fe_flag_tmp = newTemp(Ity_I32);
+   *fg_flag_tmp = newTemp(Ity_I32);
+   assign( frB_exp_shR, fp_exp_part( frB_Int, sp ) );
+   assign(e_b, binop( Iop_Sub32, mkexpr(frB_exp_shR), mkU32( bias ) ));
 
+   //////////////////  fe_flag tests BEGIN //////////////////////
+   /* We first do all tests that may result in setting fe_flag to '1'.
+    * (NOTE: These tests are similar to those used for ftdiv.  See do_fp_tdiv()
+    * for details.)
+    */
+   frbNaN = sp ? is_NaN_32(frB_Int) : is_NaN(frB_Int);
+   assign( frbInf_tmp, is_Inf(frB_Int, sp) );
+   assign( frbZero_tmp, is_Zero(frB_Int, sp ) );
+   {
+      // Test_value = -970 for double precision
+      UInt test_value = sp ? 0xffffff99 : 0xfffffc36;
+      eb_LTE = binop( Iop_CmpLE32S, mkexpr( e_b ), mkU32( test_value ) );
+   }
+   frBNeg = binop( Iop_CmpEQ32,
+                   binop( Iop_Shr32,
+                          sp ? mkexpr( frB_Int ) : unop( Iop_64HIto32, mkexpr( frB_Int ) ),
+                          mkU8( 31 ) ),
+                   mkU32( 1 ) );
+   //////////////////  fe_flag tests END //////////////////////
+
+   //////////////////  fg_flag tests BEGIN //////////////////////
+   /*
+    * The following tests were already performed above in the fe_flag
+    * tests.  So these conditions will result in both fe_ and fg_ flags
+    * being set.
+    *   - Test if FRB is Zero
+    *   - Test if FRB is an Infinity
+    */
+
+   /*
+    * Test if FRB holds a denormalized value.  A denormalized value is one where
+    * the exp is 0 and the fraction is non-zero.
+    */
+   if (sp) {
+      IRTemp frac_part = newTemp(Ity_I32);
+      assign( frac_part, binop( Iop_And32, mkexpr(frB_Int), mkU32(0x007fffff)) );
+      frbDenorm
+               = mkAND1( binop( Iop_CmpEQ32, mkexpr( frB_exp_shR ), mkU32( 0 ) ),
+                         binop( Iop_CmpNE32, mkexpr( frac_part ), mkU32( 0 ) ) );
+   } else {
+      IRExpr * hi32, * low32, * fraction_is_nonzero;
+      IRTemp frac_part = newTemp(Ity_I64);
+
+      assign( frac_part, FP_FRAC_PART(frB_Int) );
+      hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
+      low32 = unop( Iop_64to32, mkexpr( frac_part ) );
+      fraction_is_nonzero = binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ),
+                                                mkU32( 0 ) );
+      frbDenorm
+               = mkAND1( binop( Iop_CmpEQ32, mkexpr( frB_exp_shR ), mkU32( 0 ) ),
+                         fraction_is_nonzero );
+   }
+   //////////////////  fg_flag tests END //////////////////////
+
+   /////////////////////////
+   fe_flag = mkOR1( mkexpr( frbZero_tmp ),
+                    mkOR1( frbNaN,
+                           mkOR1( mkexpr( frbInf_tmp ),
+                                  mkOR1( frBNeg, eb_LTE ) ) ) );
+
+   fe_flag = unop(Iop_1Uto32, fe_flag);
+
+   fg_flag = mkOR1( mkexpr( frbZero_tmp ),
+                    mkOR1( mkexpr( frbInf_tmp ), frbDenorm ) );
+   fg_flag = unop(Iop_1Uto32, fg_flag);
+   assign (*fg_flag_tmp, fg_flag);
+   assign (*fe_flag_tmp, fe_flag);
+}
+/*
+ * fe_flag is set to 1 if any of the following conditions occurs:
+ *  - The double-precision floating-point operand in register FRA is a NaN or an
+ *    Infinity.
+ *  - The double-precision floating-point operand in register FRB is a Zero, a
+ *    NaN, or an Infinity.
+ *  - e_b is less than or equal to -1022.
+ *  - e_b is greater than or equal to 1021.
+ *  - The double-precision floating-point operand in register FRA is not a zero
+ *    and the difference, e_a - e_b, is greater than or equal to 1023.
+ *  - The double-precision floating-point operand in register FRA is not a zero
+ *    and the difference, e_a - e_b, is less than or equal to -1021.
+ *  - The double-precision floating-point operand in register FRA is not a zero
+ *    and e_a is less than or equal to -970
+ *  Otherwise fe_flag is set to 0.
+ *
+ * fg_flag is set to 1 if either of the following conditions occurs.
+ *   - The double-precision floating-point operand in register FRA is an Infinity.
+ *   - The double-precision floating-point operand in register FRB is a Zero, an
+ *     Infinity, or a denormalized value.
+ *  Otherwise fg_flag is set to 0.
+ *
+ */
+static void _do_fp_tdiv(IRTemp frA_int, IRTemp frB_int, Bool sp, IRTemp * fe_flag_tmp, IRTemp * fg_flag_tmp)
+{
+   // The following temps are for holding intermediate results
+   IRTemp e_a = newTemp(Ity_I32);
+   IRTemp e_b = newTemp(Ity_I32);
+   IRTemp frA_exp_shR = newTemp(Ity_I32);
+   IRTemp frB_exp_shR = newTemp(Ity_I32);
+
+   UInt bias = sp? 127 : 1023;
+   *fe_flag_tmp = newTemp(Ity_I32);
+   *fg_flag_tmp = newTemp(Ity_I32);
+
+   /* The following variables hold boolean results from tests
+    * that are OR'ed together for setting the fe_ and fg_ flags.
+    * For some cases, the booleans are used more than once, so
+    * I make those IRTemp's instead of IRExpr's.
+    */
+   IRExpr * fraNaN, * frbNaN, * frbDenorm;
+   IRExpr * eb_LTE, * eb_GTE, * ea_eb_GTE, * ea_eb_LTE, * ea_LTE;
+   IRTemp  fraInf_tmp = newTemp(Ity_I1);
+   IRTemp  frbZero_tmp = newTemp(Ity_I1);
+   IRTemp  frbInf_tmp = newTemp(Ity_I1);
+   IRTemp  fraNotZero_tmp = newTemp(Ity_I1);
+
+/* The following are the flags that are set by OR'ing the results of
+ * all the tests done for tdiv.  These flags are the input to the specified CR.
+ */
+   IRExpr * fe_flag, * fg_flag;
+
+   // Create temps that will be used throughout the following tests.
+   assign( frA_exp_shR, fp_exp_part( frA_int, sp ) );
+   assign( frB_exp_shR, fp_exp_part( frB_int, sp ) );
+   /* Let e_[a|b] be the unbiased exponent: i.e. exp - 1023. */
+   assign(e_a, binop( Iop_Sub32, mkexpr(frA_exp_shR), mkU32( bias ) ));
+   assign(e_b, binop( Iop_Sub32, mkexpr(frB_exp_shR), mkU32( bias ) ));
+
+
+   //////////////////  fe_flag tests BEGIN //////////////////////
+   /* We first do all tests that may result in setting fe_flag to '1'. */
+
+   /*
+    * Test if the double-precision floating-point operand in register FRA is
+    * a NaN:
+    */
+   fraNaN = sp ? is_NaN_32(frA_int) : is_NaN(frA_int);
+   /*
+    * Test if the double-precision floating-point operand in register FRA is
+    * an Infinity.
+    */
+   assign(fraInf_tmp, is_Inf(frA_int, sp));
+
+   /*
+    * Test if the double-precision floating-point operand in register FRB is
+    * a NaN:
+    */
+   frbNaN = sp ? is_NaN_32(frB_int) : is_NaN(frB_int);
+   /*
+    * Test if the double-precision floating-point operand in register FRB is
+    * an Infinity.
+    */
+   assign( frbInf_tmp, is_Inf(frB_int, sp) );
+   /*
+    * Test if the double-precision floating-point operand in register FRB is
+    * a Zero.
+    */
+   assign( frbZero_tmp, is_Zero(frB_int, sp) );
+
+   /*
+    * Test if e_b <= -1022 for double precision;
+    * or e_b <= -126 for single precision
+    */
+   {
+      UInt test_value = sp ? 0xffffff82 : 0xfffffc02;
+      eb_LTE = binop(Iop_CmpLE32S, mkexpr(e_b), mkU32(test_value));
+   }
+
+   /*
+    * Test if e_b >= 1021 (i.e., 1021 < e_b) for double precision;
+    * or e_b >= -125 (125 < e_b) for single precision
+    */
+   {
+      Int test_value = sp ? 125 : 1021;
+      eb_GTE = binop(Iop_CmpLT32S, mkU32(test_value), mkexpr(e_b));
+   }
+
+   /*
+    * Test if FRA != Zero and (e_a - e_b) >= bias
+    */
+   assign( fraNotZero_tmp, unop( Iop_Not1, is_Zero( frA_int, sp ) ) );
+   ea_eb_GTE = mkAND1( mkexpr( fraNotZero_tmp ),
+                       binop( Iop_CmpLT32S, mkU32( bias ),
+                              binop( Iop_Sub32, mkexpr( e_a ),
+                                     mkexpr( e_b ) ) ) );
+
+   /*
+    * Test if FRA != Zero and (e_a - e_b) <= [-1021 (double precision) or -125 (single precision)]
+    */
+   {
+      UInt test_value = sp ? 0xffffff83 : 0xfffffc03;
+
+      ea_eb_LTE = mkAND1( mkexpr( fraNotZero_tmp ),
+                          binop( Iop_CmpLE32S,
+                                 binop( Iop_Sub32,
+                                        mkexpr( e_a ),
+                                        mkexpr( e_b ) ),
+                                        mkU32( test_value ) ) );
+   }
+
+   /*
+    * Test if FRA != Zero and e_a <= [-970 (double precision) or -103 (single precision)]
+    */
+   {
+      UInt test_value = 0xfffffc36;  //Int test_value = -970;
+
+      ea_LTE = mkAND1( mkexpr( fraNotZero_tmp ), binop( Iop_CmpLE32S,
+                                                        mkexpr( e_a ),
+                                                        mkU32( test_value ) ) );
+   }
+   //////////////////  fe_flag tests END //////////////////////
+
+   //////////////////  fg_flag tests BEGIN //////////////////////
+   /*
+    * The following tests were already performed above in the fe_flag
+    * tests.  So these conditions will result in both fe_ and fg_ flags
+    * being set.
+    *   - Test if FRA is an Infinity
+    *   - Test if FRB ix Zero
+    *   - Test if FRB is an Infinity
+    */
+
+   /*
+    * Test if FRB holds a denormalized value.  A denormalized value is one where
+    * the exp is 0 and the fraction is non-zero.
+    */
+   {
+      IRExpr * fraction_is_nonzero;
+
+      if (sp) {
+         fraction_is_nonzero = binop( Iop_CmpNE32, FP_FRAC_PART32(frB_int),
+                                      mkU32( 0 ) );
+      } else {
+         IRExpr * hi32, * low32;
+         IRTemp frac_part = newTemp(Ity_I64);
+         assign( frac_part, FP_FRAC_PART(frB_int) );
+
+         hi32 = unop( Iop_64HIto32, mkexpr( frac_part ) );
+         low32 = unop( Iop_64to32, mkexpr( frac_part ) );
+         fraction_is_nonzero = binop( Iop_CmpNE32, binop( Iop_Or32, low32, hi32 ),
+                                      mkU32( 0 ) );
+      }
+      frbDenorm = mkAND1( binop( Iop_CmpEQ32, mkexpr( frB_exp_shR ),
+                                 mkU32( 0x0 ) ), fraction_is_nonzero );
+
+   }
+   //////////////////  fg_flag tests END //////////////////////
+
+   fe_flag
+   = mkOR1(
+            fraNaN,
+            mkOR1(
+                   mkexpr( fraInf_tmp ),
+                   mkOR1(
+                          mkexpr( frbZero_tmp ),
+                          mkOR1(
+                                 frbNaN,
+                                 mkOR1(
+                                        mkexpr( frbInf_tmp ),
+                                        mkOR1( eb_LTE,
+                                               mkOR1( eb_GTE,
+                                                      mkOR1( ea_eb_GTE,
+                                                             mkOR1( ea_eb_LTE,
+                                                                    ea_LTE ) ) ) ) ) ) ) ) );
+
+   fe_flag = unop(Iop_1Uto32, fe_flag);
+
+   fg_flag = mkOR1( mkexpr( fraInf_tmp ), mkOR1( mkexpr( frbZero_tmp ),
+                                                 mkOR1( mkexpr( frbInf_tmp ),
+                                                        frbDenorm ) ) );
+   fg_flag = unop(Iop_1Uto32, fg_flag);
+   assign(*fe_flag_tmp, fe_flag);
+   assign(*fg_flag_tmp, fg_flag);
+}
+
+/* See description for _do_fp_tdiv() above. */
+static IRExpr * do_fp_tdiv(IRTemp frA_int, IRTemp frB_int)
+{
+   IRTemp  fe_flag, fg_flag;
+   /////////////////////////
+   /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+    * where fl_flag == 1 on ppc64.
+    */
+   IRExpr * fl_flag = unop(Iop_Not32, mkU32(0xFFFFFE));
+   fe_flag = fg_flag = IRTemp_INVALID;
+   _do_fp_tdiv(frA_int, frB_int, False/*not single precision*/, &fe_flag, &fg_flag);
+   return binop( Iop_Or32,
+                 binop( Iop_Or32,
+                        binop( Iop_Shl32, fl_flag, mkU8( 3 ) ),
+                        binop( Iop_Shl32, mkexpr(fg_flag), mkU8( 2 ) ) ),
+                 binop( Iop_Shl32, mkexpr(fe_flag), mkU8( 1 ) ) );
+}
+
+static Bool dis_fp_tests ( UInt theInstr )
+{
+   UChar opc1     = ifieldOPC(theInstr);
+   UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+   UChar frB_addr = ifieldRegB(theInstr);
+   UChar b0       = ifieldBIT0(theInstr);
+   UInt  opc2     = ifieldOPClo10(theInstr);
+   IRTemp frB_I64     = newTemp(Ity_I64);
+
+   if (opc1 != 0x3F || b0 != 0 ){
+      vex_printf("dis_fp_tests(ppc)(ftdiv)\n");
+      return False;
+   }
+   assign( frB_I64, unop( Iop_ReinterpF64asI64, getFReg( frB_addr ) ) );
+
+   switch (opc2) {
+      case 0x080: // ftdiv
+      {
+         UChar frA_addr = ifieldRegA(theInstr);
+         IRTemp frA_I64     = newTemp(Ity_I64);
+         UChar b21to22  = toUChar( IFIELD( theInstr, 21, 2 ) );
+         if (b21to22 != 0 ) {
+            vex_printf("dis_fp_tests(ppc)(ftdiv)\n");
+            return False;
+         }
+
+         assign( frA_I64, unop( Iop_ReinterpF64asI64, getFReg( frA_addr ) ) );
+         putGST_field( PPC_GST_CR, do_fp_tdiv(frA_I64, frB_I64), crfD );
+
+         DIP("ftdiv crf%d,fr%u,fr%u\n", crfD, frA_addr, frB_addr);
+         break;
+      }
+      case 0x0A0: // ftsqrt
+      {
+         IRTemp flags = newTemp(Ity_I32);
+         IRTemp  fe_flag, fg_flag;
+         fe_flag = fg_flag = IRTemp_INVALID;
+         UChar b18to22  = toUChar( IFIELD( theInstr, 18, 5 ) );
+         if ( b18to22 != 0) {
+            vex_printf("dis_fp_tests(ppc)(ftsqrt)\n");
+            return False;
+         }
+         DIP("ftsqrt crf%d,fr%u\n", crfD, frB_addr);
+         do_fp_tsqrt(frB_I64, False /* not single precision*/, &fe_flag, &fg_flag);
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flags,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR, mkexpr(flags), crfD );
+         break;
+      }
+
+      default:
+         vex_printf("dis_fp_tests(ppc)(opc2)\n");
+         return False;
+
+   }
+   return True;
+}
 
 /*
   Floating Point Compare Instructions
@@ -6721,13 +7862,33 @@
       simulating exceptions, the exception status will appear to be
       zero.  Hence cr1 should be cleared if this is a . form insn. */
    Bool clear_CR1 = True;
-   
-   if (opc1 != 0x3F || b16to20 != 0) {
+   if ((!(opc1 == 0x3F || opc1 == 0x3B)) || b16to20 != 0) {
       vex_printf("dis_fp_round(ppc)(instr)\n");
       return False;
    }
 
    assign( frB, getFReg(frB_addr));
+   if (opc1 == 0x3B) {
+      /* The fcfid[u]s instructions (from ISA 2.06) are a bit odd because
+       * they're very similar to the other instructions handled here, but have
+       * a different primary opcode.
+       */
+      switch (opc2) {
+         case 0x34E: // fcfids (Float convert from signed DWord to single precision)
+            DIP("fcfids%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+            assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
+            assign( frD, binop( Iop_RoundF64toF32, rm, binop( Iop_I64StoF64, rm,
+                                                              mkexpr( r_tmp64 ) ) ) );
+            goto putFR;
+
+         case 0x3Ce: // fcfidus (Float convert from unsigned DWord to single precision)
+            DIP("fcfidus%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+            assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
+            assign( frD, unop( Iop_F32toF64, binop( Iop_I64UtoF32, rm, mkexpr( r_tmp64 ) ) ) );
+            goto putFR;
+      }
+   }
+
 
    switch (opc2) {
    case 0x00C: // frsp (Float Round to Single, PPC32 p423)
@@ -6755,6 +7916,20 @@
       set_FPRF = False;
       break;
 
+   case 0x08F: case 0x08E: // fctiwu[z]
+      DIP("fctiwu%s%s fr%u,fr%u\n", opc2 == 0x08F ? "z" : "",
+               flag_rC ? ".":"", frD_addr, frB_addr);
+      assign( r_tmp32,
+              binop( Iop_F64toI32U,
+                     opc2 == 0x08F ? mkU32( Irrm_ZERO ) : rm,
+                     mkexpr( frB ) ) );
+      assign( frD, unop( Iop_ReinterpI64asF64,
+                         unop( Iop_32Uto64, mkexpr(r_tmp32))));
+      /* FPRF is undefined after fctiwz.  Leave unchanged. */
+      set_FPRF = False;
+      break;
+
+
    case 0x32E: // fctid (Float Conv to Int DWord, PPC64 p437)
       DIP("fctid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
       assign( r_tmp64,
@@ -6773,6 +7948,17 @@
       set_FPRF = False;
       break;
 
+   case 0x3AE: case 0x3AF: // fctidu[z] (Float Conv to Int DWord Unsigned [Round to Zero])
+   {
+      DIP("fctidu%s%s fr%u,fr%u\n", opc2 == 0x3AE ? "" : "z",
+               flag_rC ? ".":"", frD_addr, frB_addr);
+      assign( r_tmp64,
+              binop(Iop_F64toI64U, opc2 == 0x3AE ? rm : mkU32(Irrm_ZERO), mkexpr(frB)) );
+      assign( frD, unop( Iop_ReinterpI64asF64, mkexpr(r_tmp64)) );
+      /* FPRF is undefined after fctidz.  Leave unchanged. */
+      set_FPRF = False;
+      break;
+   }
    case 0x34E: // fcfid (Float Conv from Int DWord, PPC64 p434)
       DIP("fcfid%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
       assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
@@ -6780,6 +7966,12 @@
               binop(Iop_I64StoF64, rm, mkexpr(r_tmp64)) );
       break;
 
+   case 0x3CE: // fcfidu (Float convert from unsigned DWord)
+      DIP("fcfidu%s fr%u,fr%u\n", flag_rC ? ".":"", frD_addr, frB_addr);
+      assign( r_tmp64, unop( Iop_ReinterpF64asI64, mkexpr(frB)) );
+      assign( frD, binop( Iop_I64UtoF64, rm, mkexpr( r_tmp64 ) ) );
+      break;
+
    case 0x188: case 0x1A8: case 0x1C8: case 0x1E8: // frin, friz, frip, frim
       switch(opc2) {
       case 0x188: // frin (Floating Round to Integer Nearest)
@@ -6830,7 +8022,7 @@
       vex_printf("dis_fp_round(ppc)(opc2)\n");
       return False;
    }
-
+putFR:
    putFReg( frD_addr, mkexpr(frD) );
 
    if (set_FPRF) {
@@ -7271,6 +8463,2663 @@
 }
 
 /*
+ * VSX scalar and vector convert instructions
+ */
+static Bool
+dis_vx_conv ( UInt theInstr, UInt opc2 )
+{
+   /* XX2-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT( theInstr );
+   UChar XB = ifieldRegXB( theInstr );
+   IRTemp xB, xB2;
+   IRTemp b3, b2, b1, b0;
+   xB = xB2 = IRTemp_INVALID;
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vx_conv(ppc)(instr)\n" );
+      return False;
+   }
+
+   /* Create and assign temps only as needed for the given instruction. */
+   switch (opc2) {
+      // scalar double-precision floating point argument
+      case 0x2B0: case 0x0b0: case 0x290: case 0x212: case 0x090:
+         xB = newTemp(Ity_F64);
+         assign( xB,
+                 unop( Iop_ReinterpI64asF64,
+                       unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+         break;
+      // vector double-precision floating point arguments
+      case 0x1b0: case 0x312: case 0x390: case 0x190: case 0x3B0:
+
+         xB = newTemp(Ity_F64);
+         xB2 = newTemp(Ity_F64);
+         assign( xB,
+                 unop( Iop_ReinterpI64asF64,
+                       unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+         assign( xB2,
+                 unop( Iop_ReinterpI64asF64,
+                       unop( Iop_V128to64, getVSReg( XB ) ) ) );
+         break;
+      // vector single precision or [un]signed integer word arguments
+      case 0x130: case 0x392: case 0x330: case 0x310: case 0x110:
+      case 0x1f0: case 0x1d0:
+         b3 = b2 = b1 = b0 = IRTemp_INVALID;
+         breakV128to4x32(getVSReg(XB), &b3, &b2, &b1, &b0);
+         break;
+         // vector [un]signed integer doubleword argument
+      case 0x3f0: case 0x370: case 0x3d0: case 0x350:
+         xB = newTemp(Ity_I64);
+         assign( xB, unop( Iop_V128HIto64, getVSReg( XB ) ) );
+         xB2 = newTemp(Ity_I64);
+         assign( xB2, unop( Iop_V128to64, getVSReg( XB ) ) );
+         break;
+      // scalar [un]signed integer doubleword argument
+      case 0x2F0: case 0x2D0:
+         xB = newTemp(Ity_I64);
+         assign( xB, unop( Iop_V128HIto64, getVSReg( XB ) ) );
+         break;
+      // scalar single precision argument
+      case 0x292: // xscvspdp
+         xB = newTemp(Ity_I32);
+         assign( xB,
+                 unop( Iop_64HIto32, unop( Iop_V128HIto64, getVSReg( XB ) ) ) );
+         break;
+
+      /* Certain instructions have their complete implementation in the main switch statement
+       * that follows this one; thus we have a "do nothing" case for those instructions here.
+       */
+      case 0x170: case 0x150:
+         break; // do nothing
+
+      default:
+         vex_printf( "dis_vx_conv(ppc)(opc2)\n" );
+         return False;
+   }
+
+
+   switch (opc2) {
+      case 0x2B0:
+         // xscvdpsxds (VSX Scalar truncate Double-Precision to integer and Convert
+         //             to Signed Integer Doubleword format with Saturate)
+         DIP("xscvdpsxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128, binop( Iop_F64toI64S,
+                                                 mkU32( Irrm_ZERO ),
+                                                 mkexpr( xB ) ), mkU64( 0 ) ) );
+         break;
+      case 0x0b0: // xscvdpsxws (VSX Scalar truncate Double-Precision to integer and
+                  //             Convert to Signed Integer Word format with Saturate)
+         DIP("xscvdpsxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_32Sto64,
+                                binop( Iop_F64toI32S,
+                                       mkU32( Irrm_ZERO ),
+                                       mkexpr( xB ) ) ),
+                                       mkU64( 0ULL ) ) );
+         break;
+      case 0x290: // xscvdpuxds (VSX Scalar truncate Double-Precision integer and Convert
+                  //             to Unsigned Integer Doubleword format with Saturate)
+         DIP("xscvdpuxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_F64toI64U,
+                                 mkU32( Irrm_ZERO ),
+                                 mkexpr( xB ) ),
+                                 mkU64( 0ULL ) ) );
+         break;
+      case 0x2F0:
+         // xscvsxddp (VSX Scalar Convert and round Signed Integer Doubleword to
+         //            Double-Precision format)
+         DIP("xscvsxddp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                binop( Iop_I64StoF64, get_IR_roundingmode(),
+                                                       mkexpr( xB ) ) ),
+                                                       mkU64( 0 ) ) );
+         break;
+      case 0x2D0:
+         // xscvuxddp (VSX Scalar Convert and round Unsigned Integer Doubleword to
+         //            Double-Precision format)
+         DIP("xscvuxddp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                binop( Iop_I64UtoF64, get_IR_roundingmode(),
+                                                       mkexpr( xB ) ) ),
+                                                       mkU64( 0 ) ) );
+         break;
+      case 0x1b0: // xvcvdpsxws (VSX Vector truncate Double-Precision to integer and Convert
+                  //             to Signed Integer Word format with Saturate)
+      {
+         IRTemp hiResult_32 = newTemp(Ity_I32);
+         IRTemp loResult_32 = newTemp(Ity_I32);
+         IRExpr* rmZero = mkU32(Irrm_ZERO);
+
+         DIP("xvcvdpsxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         assign(hiResult_32, binop(Iop_F64toI32S, rmZero, mkexpr(xB)));
+         assign(loResult_32, binop(Iop_F64toI32S, rmZero, mkexpr(xB2)));
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_32Sto64, mkexpr( hiResult_32 ) ),
+                          unop( Iop_32Sto64, mkexpr( loResult_32 ) ) ) );
+         break;
+      }
+      case 0x130: case 0x110: // xvcvspsxws, xvcvspuxws
+         //  (VSX Vector truncate Single-Precision to integer and
+         //   Convert to [Un]signed Integer Word format with Saturate)
+      {
+         IRExpr * b0_result, * b1_result, * b2_result, * b3_result;
+         IRTemp tempResult = newTemp(Ity_V128);
+         IRTemp res0 = newTemp(Ity_I32);
+         IRTemp res1 = newTemp(Ity_I32);
+         IRTemp res2 = newTemp(Ity_I32);
+         IRTemp res3 = newTemp(Ity_I32);
+         IRTemp hi64 = newTemp(Ity_I64);
+         IRTemp lo64 = newTemp(Ity_I64);
+         Bool un_signed = (opc2 == 0x110);
+         IROp op = un_signed ? Iop_QFtoI32Ux4_RZ : Iop_QFtoI32Sx4_RZ;
+
+         DIP("xvcvsp%sxws v%u,v%u\n", un_signed ? "u" : "s", (UInt)XT, (UInt)XB);
+         /* The xvcvsp{s|u}xws instruction is similar to vct{s|u}xs, except if src is a NaN,
+          * then result is set to 0x80000000.  */
+         assign(tempResult, unop(op, getVSReg(XB)));
+         assign( hi64, unop(Iop_V128HIto64, mkexpr(tempResult)) );
+         assign( lo64, unop(Iop_V128to64,   mkexpr(tempResult)) );
+         assign( res3, unop(Iop_64HIto32, mkexpr(hi64)) );
+         assign( res2, unop(Iop_64to32,   mkexpr(hi64)) );
+         assign( res1, unop(Iop_64HIto32, mkexpr(lo64)) );
+         assign( res0, unop(Iop_64to32,   mkexpr(lo64)) );
+
+         b3_result = IRExpr_Mux0X(unop(Iop_1Uto8, is_NaN_32(b3)),
+                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
+                                  mkexpr(res3),
+                                  // then: result is 0x{8|0}80000000
+                                  mkU32(un_signed ? 0x00000000 : 0x80000000));
+         b2_result = IRExpr_Mux0X(unop(Iop_1Uto8, is_NaN_32(b2)),
+                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
+                                  mkexpr(res2),
+                                  // then: result is 0x{8|0}80000000
+                                  mkU32(un_signed ? 0x00000000 : 0x80000000));
+         b1_result = IRExpr_Mux0X(unop(Iop_1Uto8, is_NaN_32(b1)),
+                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
+                                  mkexpr(res1),
+                                  // then: result is 0x{8|0}80000000
+                                  mkU32(un_signed ? 0x00000000 : 0x80000000));
+         b0_result = IRExpr_Mux0X(unop(Iop_1Uto8, is_NaN_32(b0)),
+                                  // else: result is from the Iop_QFtoI32{s|u}x4_RZ
+                                  mkexpr(res0),
+                                  // then: result is 0x{8|0}80000000
+                                  mkU32(un_signed ? 0x00000000 : 0x80000000));
+
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, b3_result, b2_result ),
+                          binop( Iop_32HLto64, b1_result, b0_result ) ) );
+         break;
+      }
+      case 0x212: // xscvdpsp (VSX Scalar round Double-Precision to single-precision and
+                  //           Convert to Single-Precision format
+         DIP("xscvdpsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    mkexpr( xB ) ) ) ),
+                                 mkU32( 0 ) ),
+                          mkU64( 0ULL ) ) );
+         break;
+      case 0x090: // xscvdpuxws (VSX Scalar truncate Double-Precision to integer
+                  //             and Convert to Unsigned Integer Word format with Saturate)
+         DIP("xscvdpuxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 mkU32( 0 ),
+                                 binop( Iop_F64toI32U,
+                                        mkU32( Irrm_ZERO ),
+                                        mkexpr( xB ) ) ),
+                          mkU64( 0ULL ) ) );
+         break;
+      case 0x292: // xscvspdp (VSX Scalar Convert Single-Precision to Double-Precision format)
+         DIP("xscvspdp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_F32toF64,
+                                      unop( Iop_ReinterpI32asF32, mkexpr( xB ) ) ) ),
+                          mkU64( 0ULL ) ) );
+         break;
+      case 0x312: // xvcvdpsp (VSX Vector round Double-Precision to single-precision
+                  //           and Convert to Single-Precision format)
+         DIP("xvcvdpsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    mkexpr( xB ) ) ) ),
+                                 mkU32( 0 ) ),
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    mkexpr( xB2 ) ) ) ),
+                                 mkU32( 0 ) ) ) );
+         break;
+      case 0x390: // xvcvdpuxds (VSX Vector truncate Double-Precision to integer
+                  //             and Convert to Unsigned Integer Doubleword format
+                  //             with Saturate)
+         DIP("xvcvdpuxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_F64toI64U, mkU32( Irrm_ZERO ), mkexpr( xB ) ),
+                          binop( Iop_F64toI64U, mkU32( Irrm_ZERO ), mkexpr( xB2 ) ) ) );
+         break;
+      case 0x190: // xvcvdpuxws (VSX Vector truncate Double-Precision to integer and
+                  //             Convert to Unsigned Integer Word format with Saturate)
+         DIP("xvcvdpuxws v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 binop( Iop_F64toI32U,
+                                        mkU32( Irrm_ZERO ),
+                                        mkexpr( xB ) ),
+                                 mkU32( 0 ) ),
+                          binop( Iop_32HLto64,
+                                 binop( Iop_F64toI32U,
+                                        mkU32( Irrm_ZERO ),
+                                        mkexpr( xB2 ) ),
+                                 mkU32( 0 ) ) ) );
+         break;
+      case 0x392: // xvcvspdp (VSX Vector Convert Single-Precision to Double-Precision format)
+         DIP("xvcvspdp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_F32toF64,
+                                      unop( Iop_ReinterpI32asF32, mkexpr( b3 ) ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_F32toF64,
+                                      unop( Iop_ReinterpI32asF32, mkexpr( b1 ) ) ) ) ) );
+         break;
+      case 0x330: // xvcvspsxds (VSX Vector truncate Single-Precision to integer and
+                  //           Convert to Signed Integer Doubleword format with Saturate)
+         DIP("xvcvspsxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_F64toI64S,
+                                 mkU32( Irrm_ZERO ),
+                                 unop( Iop_F32toF64,
+                                       unop( Iop_ReinterpI32asF32, mkexpr( b3 ) ) ) ),
+                          binop( Iop_F64toI64S,
+                                 mkU32( Irrm_ZERO ),
+                                 unop( Iop_F32toF64,
+                                       unop( Iop_ReinterpI32asF32, mkexpr( b1 ) ) ) ) ) );
+         break;
+      case 0x310: // xvcvspuxds (VSX Vector truncate Single-Precision to integer and
+                  //            Convert to Unsigned Integer Doubleword format with Saturate)
+         DIP("xvcvspuxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_F64toI64U,
+                                 mkU32( Irrm_ZERO ),
+                                 unop( Iop_F32toF64,
+                                       unop( Iop_ReinterpI32asF32, mkexpr( b3 ) ) ) ),
+                          binop( Iop_F64toI64U,
+                                 mkU32( Irrm_ZERO ),
+                                 unop( Iop_F32toF64,
+                                       unop( Iop_ReinterpI32asF32, mkexpr( b1 ) ) ) ) ) );
+         break;
+      case 0x3B0: // xvcvdpsxds (VSX Vector truncate Double-Precision to integer and
+                  //             Convert to Signed Integer Doubleword format with Saturate)
+         DIP("xvcvdpsxds v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_F64toI64S, mkU32( Irrm_ZERO ), mkexpr( xB ) ),
+                          binop( Iop_F64toI64S, mkU32( Irrm_ZERO ), mkexpr( xB2 ) ) ) );
+         break;
+      case 0x3f0: // xvcvsxddp (VSX Vector Convert and round Signed Integer Doubleword
+                  //            to Double-Precision format)
+         DIP("xvcvsxddp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64StoF64,
+                                       get_IR_roundingmode(),
+                                       mkexpr( xB ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64StoF64,
+                                       get_IR_roundingmode(),
+                                       mkexpr( xB2 ) ) ) ) );
+         break;
+      case 0x3d0: // xvcvuxddp (VSX Vector Convert and round Unsigned Integer Doubleword
+                  //            to Double-Precision format)
+         DIP("xvcvuxddp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64UtoF64,
+                                       get_IR_roundingmode(),
+                                       mkexpr( xB ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64UtoF64,
+                                       get_IR_roundingmode(),
+                                       mkexpr( xB2 ) ) ) ) );
+
+         break;
+      case 0x370: // xvcvsxdsp (VSX Vector Convert and round Signed Integer Doubleword
+                  //            to Single-Precision format)
+         DIP("xvcvsxddp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    binop( Iop_I64StoF64,
+                                                           get_IR_roundingmode(),
+                                                           mkexpr( xB ) ) ) ) ),
+                                 mkU32( 0 ) ),
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    binop( Iop_I64StoF64,
+                                                           get_IR_roundingmode(),
+                                                           mkexpr( xB2 ) ) ) ) ),
+                                 mkU32( 0 ) ) ) );
+         break;
+      case 0x350: // xvcvuxdsp (VSX Vector Convert and round Unsigned Integer Doubleword
+                  //            to Single-Precision format)
+         DIP("xvcvuxddp v%u,v%u\n", (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    binop( Iop_I64UtoF64,
+                                                           get_IR_roundingmode(),
+                                                           mkexpr( xB ) ) ) ) ),
+                                 mkU32( 0 ) ),
+                          binop( Iop_32HLto64,
+                                 unop( Iop_ReinterpF32asI32,
+                                       unop( Iop_TruncF64asF32,
+                                             binop( Iop_RoundF64toF32,
+                                                    get_IR_roundingmode(),
+                                                    binop( Iop_I64UtoF64,
+                                                           get_IR_roundingmode(),
+                                                           mkexpr( xB2 ) ) ) ) ),
+                                 mkU32( 0 ) ) ) );
+         break;
+
+      case 0x1f0: // xvcvsxwdp (VSX Vector Convert Signed Integer Word to Double-Precision format)
+         DIP("xvcvsxwdp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64StoF64, get_IR_roundingmode(),
+                                       unop( Iop_32Sto64, mkexpr( b3 ) ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64StoF64, get_IR_roundingmode(),
+                                       unop( Iop_32Sto64, mkexpr( b1 ) ) ) ) ) );
+         break;
+      case 0x1d0: // xvcvuxwdp (VSX Vector Convert Unsigned Integer Word to Double-Precision format)
+         DIP("xvcvuxwdp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64UtoF64, get_IR_roundingmode(),
+                                       unop( Iop_32Uto64, mkexpr( b3 ) ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                binop( Iop_I64UtoF64, get_IR_roundingmode(),
+                                       unop( Iop_32Uto64, mkexpr( b1 ) ) ) ) ) );
+         break;
+      case 0x170: // xvcvsxwsp (VSX Vector Convert Signed Integer Word to Single-Precision format)
+         DIP("xvcvsxwsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT, unop( Iop_I32StoFx4, getVSReg( XB ) ) );
+         break;
+      case 0x150: // xvcvuxwsp (VSX Vector Convert Unsigned Integer Word to Single-Precision format)
+         DIP("xvcvuxwsp v%u,v%u\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT, unop( Iop_I32UtoFx4, getVSReg( XB ) ) );
+         break;
+
+      default:
+         vex_printf( "dis_vx_conv(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+/*
+ * VSX vector Double Precision Floating Point Arithmetic Instructions
+ */
+static Bool
+dis_vxv_dp_arith ( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT( theInstr );
+   UChar XA = ifieldRegXA( theInstr );
+   UChar XB = ifieldRegXB( theInstr );
+   IRExpr* rm = get_IR_roundingmode();
+   IRTemp frA = newTemp(Ity_F64);
+   IRTemp frB = newTemp(Ity_F64);
+   IRTemp frA2 = newTemp(Ity_F64);
+   IRTemp frB2 = newTemp(Ity_F64);
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vxv_dp_arith(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign(frA,  unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XA ))));
+   assign(frB,  unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB ))));
+   assign(frA2, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, getVSReg( XA ))));
+   assign(frB2, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, getVSReg( XB ))));
+
+   switch (opc2) {
+      case 0x1E0: // xvdivdp (VSX Vector Divide Double-Precision)
+      case 0x1C0: // xvmuldp (VSX Vector Multiply Double-Precision)
+      case 0x180: // xvadddp (VSX Vector Add Double-Precision)
+      case 0x1A0: // xvsubdp (VSX Vector Subtract Double-Precision)
+      {
+         IROp mOp;
+         Char * oper_name;
+         switch (opc2) {
+            case 0x1E0:
+               mOp = Iop_DivF64;
+               oper_name = "div";
+               break;
+            case 0x1C0:
+               mOp = Iop_MulF64;
+               oper_name = "mul";
+               break;
+            case 0x180:
+               mOp = Iop_AddF64;
+               oper_name = "add";
+               break;
+            case 0x1A0:
+               mOp = Iop_SubF64;
+               oper_name = "sub";
+               break;
+
+            default:
+               vpanic("The impossible happened: dis_vxv_dp_arith(ppc)");
+         }
+         IRTemp hiResult = newTemp(Ity_I64);
+         IRTemp loResult = newTemp(Ity_I64);
+         DIP("xv%sdp v%d,v%d,v%d\n", oper_name, (UInt)XT, (UInt)XA, (UInt)XB);
+
+         assign( hiResult,
+                 unop( Iop_ReinterpF64asI64,
+                       triop( mOp, rm, mkexpr( frA ), mkexpr( frB ) ) ) );
+         assign( loResult,
+                 unop( Iop_ReinterpF64asI64,
+                       triop( mOp, rm, mkexpr( frA2 ), mkexpr( frB2 ) ) ) );
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128, mkexpr( hiResult ), mkexpr( loResult ) ) );
+         break;
+      }
+      case 0x196: // xvsqrtdp
+      {
+         IRTemp hiResult = newTemp(Ity_I64);
+         IRTemp loResult = newTemp(Ity_I64);
+         DIP("xvsqrtdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+
+         assign( hiResult,
+                 unop( Iop_ReinterpF64asI64,
+                       binop( Iop_SqrtF64, rm, mkexpr( frB ) ) ) );
+         assign( loResult,
+                 unop( Iop_ReinterpF64asI64,
+                       binop( Iop_SqrtF64, rm, mkexpr( frB2 ) ) ) );
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128, mkexpr( hiResult ), mkexpr( loResult ) ) );
+         break;
+      }
+      case 0x184: case 0x1A4: // xvmaddadp, xvmaddmdp (VSX Vector Multiply-Add Double-Precision)
+      case 0x1C4: case 0x1E4: // xvmsubadp, xvmsubmdp (VSX Vector Multiply-Subtract Double-Precision)
+      case 0x384: case 0x3A4: // xvnmaddadp, xvnmaddmdp (VSX Vector Negate Multiply-Add Double-Precision)
+      case 0x3C4: case 0x3E4: // xvnmsubadp, xvnmsubmdp (VSX Vector Negate Multiply-Subtract Double-Precision)
+      {
+         /* xvm{add|sub}mdp XT,XA,XB is element-wise equivalent to fm{add|sub} FRT,FRA,FRC,FRB with . . .
+          *    XT == FRC
+          *    XA == FRA
+          *    XB == FRB
+          *
+          * and for xvm{add|sub}adp . . .
+          *    XT == FRB
+          *    XA == FRA
+          *    XB == FRC
+          */
+         Bool negate;
+         IROp mOp = Iop_INVALID;
+         Char * oper_name = NULL;
+         Bool mdp = False;
+
+         switch (opc2) {
+            case 0x184: case 0x1A4:
+            case 0x384: case 0x3A4:
+               mOp = Iop_MAddF64;
+               oper_name = "add";
+               mdp = (opc2 & 0x0FF) == 0x0A4;
+               break;
+
+            case 0x1C4: case 0x1E4:
+            case 0x3C4: case 0x3E4:
+               mOp = Iop_MSubF64;
+               oper_name = "sub";
+               mdp = (opc2 & 0x0FF) == 0x0E4;
+               break;
+
+            default:
+               vpanic("The impossible happened: dis_vxv_sp_arith(ppc)");
+         }
+
+         switch (opc2) {
+            case 0x384: case 0x3A4:
+            case 0x3C4: case 0x3E4:
+               negate = True;
+               break;
+            default:
+               negate = False;
+         }
+         IRTemp hiResult = newTemp(Ity_I64);
+         IRTemp loResult = newTemp(Ity_I64);
+         IRTemp frT = newTemp(Ity_F64);
+         IRTemp frT2 = newTemp(Ity_F64);
+         DIP("xv%sm%s%s v%d,v%d,v%d\n", negate ? "n" : "", oper_name, mdp ? "mdp" : "adp",
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         assign(frT,  unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XT ) ) ) );
+         assign(frT2, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, getVSReg( XT ) ) ) );
+
+         assign( hiResult,
+                 unop( Iop_ReinterpF64asI64,
+                       qop( mOp,
+                            rm,
+                            mkexpr( frA ),
+                            mkexpr( mdp ? frT : frB ),
+                            mkexpr( mdp ? frB : frT ) ) ) );
+         assign( loResult,
+                 unop( Iop_ReinterpF64asI64,
+                       qop( mOp,
+                            rm,
+                            mkexpr( frA2 ),
+                            mkexpr( mdp ? frT2 : frB2 ),
+                            mkexpr( mdp ? frB2 : frT2 ) ) ) );
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          mkexpr( negate ? getNegatedResult( hiResult )
+                                         : hiResult ),
+                          mkexpr( negate ? getNegatedResult( loResult )
+                                         : loResult ) ) );
+         break;
+      }
+      case 0x1D4: // xvtsqrtdp (VSX Vector Test for software Square Root Double-Precision)
+      {
+         IRTemp frBHi_I64 = newTemp(Ity_I64);
+         IRTemp frBLo_I64 = newTemp(Ity_I64);
+         IRTemp flagsHi = newTemp(Ity_I32);
+         IRTemp flagsLo = newTemp(Ity_I32);
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp  fe_flagHi, fg_flagHi, fe_flagLo, fg_flagLo;
+         fe_flagHi = fg_flagHi = fe_flagLo = fg_flagLo = IRTemp_INVALID;
+
+         DIP("xvtsqrtdp cr%d,v%d\n", (UInt)crfD, (UInt)XB);
+         assign( frBHi_I64, unop(Iop_V128HIto64, getVSReg( XB )) );
+         assign( frBLo_I64, unop(Iop_V128to64, getVSReg( XB )) );
+         do_fp_tsqrt(frBHi_I64, False /*not single precision*/, &fe_flagHi, &fg_flagHi);
+         do_fp_tsqrt(frBLo_I64, False /*not single precision*/, &fe_flagLo, &fg_flagLo);
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flagsHi,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flagHi), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flagHi), mkU8( 1 ) ) ) );
+         assign( flagsLo,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flagLo), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flagLo), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR,
+                       binop( Iop_Or32, mkexpr( flagsHi ), mkexpr( flagsLo ) ),
+                       crfD );
+         break;
+      }
+      case 0x1F4: // xvtdivdp (VSX Vector Test for software Divide Double-Precision)
+      {
+         IRTemp frBHi_I64 = newTemp(Ity_I64);
+         IRTemp frBLo_I64 = newTemp(Ity_I64);
+         IRTemp frAHi_I64 = newTemp(Ity_I64);
+         IRTemp frALo_I64 = newTemp(Ity_I64);
+         IRTemp flagsHi = newTemp(Ity_I32);
+         IRTemp flagsLo = newTemp(Ity_I32);
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp  fe_flagHi, fg_flagHi, fe_flagLo, fg_flagLo;
+         fe_flagHi = fg_flagHi = fe_flagLo = fg_flagLo = IRTemp_INVALID;
+
+         DIP("xvtdivdp cr%d,v%d,v%d\n", (UInt)crfD, (UInt)XA, (UInt)XB);
+         assign( frAHi_I64, unop(Iop_V128HIto64, getVSReg( XA )) );
+         assign( frALo_I64, unop(Iop_V128to64, getVSReg( XA )) );
+         assign( frBHi_I64, unop(Iop_V128HIto64, getVSReg( XB )) );
+         assign( frBLo_I64, unop(Iop_V128to64, getVSReg( XB )) );
+
+         _do_fp_tdiv(frAHi_I64, frBHi_I64, False/*dp*/, &fe_flagHi, &fg_flagHi);
+         _do_fp_tdiv(frALo_I64, frBLo_I64, False/*dp*/, &fe_flagLo, &fg_flagLo);
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flagsHi,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flagHi), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flagHi), mkU8( 1 ) ) ) );
+         assign( flagsLo,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flagLo), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flagLo), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR,
+                       binop( Iop_Or32, mkexpr( flagsHi ), mkexpr( flagsLo ) ),
+                       crfD );
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vxv_dp_arith(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+/*
+ * VSX vector Single Precision Floating Point Arithmetic Instructions
+ */
+static Bool
+dis_vxv_sp_arith ( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT( theInstr );
+   UChar XA = ifieldRegXA( theInstr );
+   UChar XB = ifieldRegXB( theInstr );
+   IRExpr* rm = get_IR_roundingmode();
+   IRTemp a3, a2, a1, a0;
+   IRTemp b3, b2, b1, b0;
+   IRTemp res0 = newTemp(Ity_I32);
+   IRTemp res1 = newTemp(Ity_I32);
+   IRTemp res2 = newTemp(Ity_I32);
+   IRTemp res3 = newTemp(Ity_I32);
+
+   a3 = a2 = a1 = a0 = IRTemp_INVALID;
+   b3 = b2 = b1 = b0 = IRTemp_INVALID;
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vxv_sp_arith(ppc)(instr)\n" );
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x100: // xvaddsp (VSX Vector Add Single-Precision)
+         DIP("xvaddsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop(Iop_Add32Fx4, getVSReg( XA ), getVSReg( XB )) );
+         break;
+
+      case 0x140: // xvmulsp (VSX Vector Multiply Single-Precision)
+         DIP("xvmulsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop(Iop_Mul32Fx4, getVSReg( XA ), getVSReg( XB )) );
+         break;
+
+      case 0x120: // xvsubsp (VSX Vector Subtract Single-Precision)
+         DIP("xvsubsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop(Iop_Sub32Fx4, getVSReg( XA ), getVSReg( XB )) );
+         break;
+
+      case 0x160: // xvdivsp (VSX Vector Divide Single-Precision)
+      {
+         /* Iop_Div32Fx4 is not implemented for ppc64 (in host_ppc_{isel|defs}.c.
+          * So there are two choices:
+          *   1. Implement the xvdivsp with a native insn; or
+          *   2. Extract the 4 single precision floats from each vector
+          *      register inputs and perform fdivs on each pair
+          * I will do the latter, due to the general philosophy of
+          * reusing existing implementations when practical.
+          */
+         DIP("xvdivsp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         breakV128to4xF64( getVSReg( XA ), &a3, &a2, &a1, &a0 );
+         breakV128to4xF64( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+
+         assign( res0,
+              unop( Iop_ReinterpF32asI32,
+                    unop( Iop_TruncF64asF32,
+                          triop( Iop_DivF64r32, rm, mkexpr( a0 ), mkexpr( b0 ) ) ) ) );
+         assign( res1,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32, rm, mkexpr( a1 ), mkexpr( b1 ) ) ) ) );
+         assign( res2,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32, rm, mkexpr( a2 ), mkexpr( b2 ) ) ) ) );
+         assign( res3,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32, rm, mkexpr( a3 ), mkexpr( b3 ) ) ) ) );
+
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, mkexpr( res3 ), mkexpr( res2 ) ),
+                          binop( Iop_32HLto64, mkexpr( res1 ), mkexpr( res0 ) ) ) );
+         break;
+      }
+      case 0x116: // xvsqrtsp (VSX Vector Square Root Single-Precision)
+      {
+         DIP("xvsqrtsp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         breakV128to4xF64( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+         /* Note: The native xvsqrtsp insruction does not always give the same precision
+          * as what we get with Iop_SqrtF64.  But it doesn't seem worthwhile to implement
+          * an Iop_SqrtF32 that would give us a lower precision result, albeit more true
+          * to the actual instruction.
+          */
+
+         assign( res0,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             binop(Iop_SqrtF64, rm, mkexpr( b0 ) ) ) ) );
+         assign( res1,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             binop(Iop_SqrtF64, rm, mkexpr( b1 ) ) ) ) );
+         assign( res2,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             binop(Iop_SqrtF64, rm, mkexpr( b2) ) ) ) );
+         assign( res3,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             binop(Iop_SqrtF64, rm, mkexpr( b3 ) ) ) ) );
+
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, mkexpr( res3 ), mkexpr( res2 ) ),
+                          binop( Iop_32HLto64, mkexpr( res1 ), mkexpr( res0 ) ) ) );
+         break;
+      }
+
+      case 0x104: case 0x124: // xvmaddasp, xvmaddmsp (VSX Vector Multiply-Add Single-Precision)
+      case 0x144: case 0x164: // xvmsubasp, xvmsubmsp (VSX Vector Multiply-Subtract Single-Precision)
+      case 0x304: case 0x324: // xvnmaddasp, xvnmaddmsp (VSX Vector Negate Multiply-Add Single-Precision)
+      case 0x344: case 0x364: // xvnmsubasp, xvnmsubmsp (VSX Vector Negate Multiply-Subtract Single-Precision)
+      {
+         IRTemp t3, t2, t1, t0;
+         Bool msp = False;
+         Bool negate;
+         Char * oper_name = NULL;
+         IROp mOp = Iop_INVALID;
+         switch (opc2) {
+            case 0x104: case 0x124:
+            case 0x304: case 0x324:
+               msp = (opc2 & 0x0FF) == 0x024;
+               mOp = Iop_MAddF64r32;
+               oper_name = "madd";
+               break;
+
+            case 0x144: case 0x164:
+            case 0x344: case 0x364:
+               msp = (opc2 & 0x0FF) == 0x064;
+               mOp = Iop_MSubF64r32;
+               oper_name = "sub";
+               break;
+
+            default:
+               vpanic("The impossible happened: dis_vxv_sp_arith(ppc)");
+         }
+
+         switch (opc2) {
+            case 0x304: case 0x324:
+            case 0x344: case 0x364:
+               negate = True;
+               break;
+
+            default:
+               negate = False;
+         }
+
+         DIP("xv%sm%s%s v%d,v%d,v%d\n", negate ? "n" : "", oper_name, msp ? "msp" : "asp",
+             (UInt)XT, (UInt)XA, (UInt)XB);
+
+         t3 = t2 = t1 = t0 = IRTemp_INVALID;
+         breakV128to4xF64( getVSReg( XA ), &a3, &a2, &a1, &a0 );
+         breakV128to4xF64( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+         breakV128to4xF64( getVSReg( XT ), &t3, &t2, &t1, &t0 );
+
+         assign( res0,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             qop( mOp,
+                                  rm,
+                                  mkexpr( a0 ),
+                                  mkexpr( msp ? t0 : b0 ),
+                                  mkexpr( msp ? b0 : t0 ) ) ) ) );
+         assign( res1,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             qop( mOp,
+                                  rm,
+                                  mkexpr( a1 ),
+                                  mkexpr( msp ? t1 : b1 ),
+                                  mkexpr( msp ? b1 : t1 ) ) ) ) );
+         assign( res2,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             qop( mOp,
+                                  rm,
+                                  mkexpr( a2 ),
+                                  mkexpr( msp ? t2 : b2 ),
+                                  mkexpr( msp ? b2 : t2 ) ) ) ) );
+         assign( res3,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             qop( mOp,
+                                  rm,
+                                  mkexpr( a3 ),
+                                  mkexpr( msp ? t3 : b3 ),
+                                  mkexpr( msp ? b3 : t3 ) ) ) ) );
+
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, mkexpr( negate ? getNegatedResult_32( res3 ) : res3 ),
+                                 mkexpr( negate ? getNegatedResult_32( res2 ) : res2 ) ),
+                          binop( Iop_32HLto64, mkexpr( negate ? getNegatedResult_32( res1 ) : res1 ),
+                                 mkexpr( negate ? getNegatedResult_32( res0 ) : res0 ) ) ) );
+
+         break;
+      }
+      case 0x154: // xvtsqrtsp (VSX Vector Test for software Square Root Single-Precision)
+      {
+         IRTemp flags0 = newTemp(Ity_I32);
+         IRTemp flags1 = newTemp(Ity_I32);
+         IRTemp flags2 = newTemp(Ity_I32);
+         IRTemp flags3 = newTemp(Ity_I32);
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp  fe_flag0, fg_flag0, fe_flag1, fg_flag1;
+         IRTemp  fe_flag2, fg_flag2, fe_flag3, fg_flag3;
+         fe_flag0 = fg_flag0 = fe_flag1 = fg_flag1 = IRTemp_INVALID;
+         fe_flag2 = fg_flag2 = fe_flag3 = fg_flag3 = IRTemp_INVALID;
+         DIP("xvtsqrtsp cr%d,v%d\n", (UInt)crfD, (UInt)XB);
+
+         breakV128to4x32( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+         do_fp_tsqrt(b0, True /* single precision*/, &fe_flag0, &fg_flag0);
+         do_fp_tsqrt(b1, True /* single precision*/, &fe_flag1, &fg_flag1);
+         do_fp_tsqrt(b2, True /* single precision*/, &fe_flag2, &fg_flag2);
+         do_fp_tsqrt(b3, True /* single precision*/, &fe_flag3, &fg_flag3);
+
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flags0,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag0), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag0), mkU8( 1 ) ) ) );
+         assign( flags1,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag1), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag1), mkU8( 1 ) ) ) );
+         assign( flags2,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag2), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag2), mkU8( 1 ) ) ) );
+         assign( flags3,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag3), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag3), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR,
+                       binop( Iop_Or32,
+                              mkexpr( flags0 ),
+                              binop( Iop_Or32,
+                                     mkexpr( flags1 ),
+                                     binop( Iop_Or32,
+                                            mkexpr( flags2 ),
+                                            mkexpr( flags3 ) ) ) ),
+                       crfD );
+
+         break;
+      }
+      case 0x174: // xvtdivsp (VSX Vector Test for software Divide Single-Precision)
+      {
+         IRTemp flags0 = newTemp(Ity_I32);
+         IRTemp flags1 = newTemp(Ity_I32);
+         IRTemp flags2 = newTemp(Ity_I32);
+         IRTemp flags3 = newTemp(Ity_I32);
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp  fe_flag0, fg_flag0, fe_flag1, fg_flag1;
+         IRTemp  fe_flag2, fg_flag2, fe_flag3, fg_flag3;
+         fe_flag0 = fg_flag0 = fe_flag1 = fg_flag1 = IRTemp_INVALID;
+         fe_flag2 = fg_flag2 = fe_flag3 = fg_flag3 = IRTemp_INVALID;
+         DIP("xvtdivsp cr%d,v%d,v%d\n", (UInt)crfD, (UInt)XA, (UInt)XB);
+
+         breakV128to4x32( getVSReg( XA ), &a3, &a2, &a1, &a0 );
+         breakV128to4x32( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+         _do_fp_tdiv(a0, b0, True /* single precision*/, &fe_flag0, &fg_flag0);
+         _do_fp_tdiv(a1, b1, True /* single precision*/, &fe_flag1, &fg_flag1);
+         _do_fp_tdiv(a2, b2, True /* single precision*/, &fe_flag2, &fg_flag2);
+         _do_fp_tdiv(a3, b3, True /* single precision*/, &fe_flag3, &fg_flag3);
+
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flags0,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag0), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag0), mkU8( 1 ) ) ) );
+         assign( flags1,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag1), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag1), mkU8( 1 ) ) ) );
+         assign( flags2,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag2), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag2), mkU8( 1 ) ) ) );
+         assign( flags3,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag3), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag3), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR,
+                       binop( Iop_Or32,
+                              mkexpr( flags0 ),
+                              binop( Iop_Or32,
+                                     mkexpr( flags1 ),
+                                     binop( Iop_Or32,
+                                            mkexpr( flags2 ),
+                                            mkexpr( flags3 ) ) ) ),
+                       crfD );
+
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vxv_sp_arith(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+typedef enum {
+   PPC_CMP_EQ = 2,
+   PPC_CMP_GT = 4,
+   PPC_CMP_GE = 6,
+   PPC_CMP_LT = 8
+} ppc_cmp_t;
+
+
+/*
+  This helper function takes as input the IRExpr returned
+  from a binop( Iop_CmpF64, fpA, fpB), whose result is returned
+  in IR form.  This helper function converts it to PPC form.
+
+  Map compare result from IR to PPC
+
+  FP cmp result | PPC | IR
+  --------------------------
+  UN            | 0x1 | 0x45
+  EQ            | 0x2 | 0x40
+  GT            | 0x4 | 0x00
+  LT            | 0x8 | 0x01
+
+ condcode = Shl(1, (~(ccIR>>5) & 2)
+                    | ((ccIR ^ (ccIR>>6)) & 1)
+*/
+static IRTemp
+get_fp_cmp_CR_val (IRExpr * ccIR_expr)
+{
+   IRTemp condcode = newTemp( Ity_I32 );
+   IRTemp ccIR = newTemp( Ity_I32 );
+
+   assign(ccIR, ccIR_expr);
+   assign( condcode,
+           binop( Iop_Shl32,
+                  mkU32( 1 ),
+                  unop( Iop_32to8,
+                        binop( Iop_Or32,
+                               binop( Iop_And32,
+                                      unop( Iop_Not32,
+                                            binop( Iop_Shr32,
+                                                   mkexpr( ccIR ),
+                                                   mkU8( 5 ) ) ),
+                                      mkU32( 2 ) ),
+                               binop( Iop_And32,
+                                      binop( Iop_Xor32,
+                                             mkexpr( ccIR ),
+                                             binop( Iop_Shr32,
+                                                    mkexpr( ccIR ),
+                                                    mkU8( 6 ) ) ),
+                                      mkU32( 1 ) ) ) ) ) );
+   return condcode;
+}
+
+/*
+ * Helper function for get_max_min_fp for ascertaining the max or min between two doubles
+ * following these special rules:
+ *   - The max/min of a QNaN and any value is that value
+ *     (When two QNaNs are being compared, the frA QNaN is the return value.)
+ *   - The max/min of any value and an SNaN is that SNaN converted to a QNaN
+ *     (When two SNaNs are being compared, the frA SNaN is converted to a QNaN.)
+ */
+static IRExpr * _get_maxmin_fp_NaN(IRTemp frA_I64, IRTemp frB_I64)
+{
+   IRTemp frA_isNaN = newTemp(Ity_I1);
+   IRTemp frB_isNaN = newTemp(Ity_I1);
+   IRTemp frA_isSNaN = newTemp(Ity_I1);
+   IRTemp frB_isSNaN = newTemp(Ity_I1);
+   IRTemp frA_isQNaN = newTemp(Ity_I1);
+   IRTemp frB_isQNaN = newTemp(Ity_I1);
+
+   assign( frA_isNaN, is_NaN( frA_I64 ) );
+   assign( frB_isNaN, is_NaN( frB_I64 ) );
+   // If operand is a NAN and bit 12 is '0', then it's an SNaN
+   assign( frA_isSNaN,
+           mkAND1( mkexpr(frA_isNaN),
+                   binop( Iop_CmpEQ32,
+                          binop( Iop_And32,
+                                 unop( Iop_64HIto32, mkexpr( frA_I64 ) ),
+                                 mkU32( 0x00080000 ) ),
+                          mkU32( 0 ) ) ) );
+   assign( frB_isSNaN,
+           mkAND1( mkexpr(frB_isNaN),
+                   binop( Iop_CmpEQ32,
+                          binop( Iop_And32,
+                                 unop( Iop_64HIto32, mkexpr( frB_I64 ) ),
+                                 mkU32( 0x00080000 ) ),
+                          mkU32( 0 ) ) ) );
+   assign( frA_isQNaN,
+           mkAND1( mkexpr( frA_isNaN ), unop( Iop_Not1, mkexpr( frA_isSNaN ) ) ) );
+   assign( frB_isQNaN,
+           mkAND1( mkexpr( frB_isNaN ), unop( Iop_Not1, mkexpr( frB_isSNaN ) ) ) );
+
+   /* Based on the rules specified in the function prologue, the algorithm is as follows:
+    *  <<<<<<<<<>>>>>>>>>>>>>>>>>>
+    *   if frA is a SNaN
+    *     result = frA converted to QNaN
+    *   else if frB is a SNaN
+    *     result = frB converted to QNaN
+    *   else if frB is a QNaN
+    *     result = frA
+    *   // One of frA or frB was a NaN in order for this function to be called, so
+    *   // if we get to this point, we KNOW that frA must be a QNaN.
+    *   else // frA is a QNaN
+    *     result = frB
+    *  <<<<<<<<<>>>>>>>>>>>>>>>>>>
+    */
+
+#define SNAN_MASK 0x0008000000000000ULL
+   return
+   IRExpr_Mux0X(unop(Iop_1Uto8, mkexpr(frA_isSNaN)),
+                /* else:  if frB is a SNaN */
+                IRExpr_Mux0X(unop(Iop_1Uto8, mkexpr(frB_isSNaN)),
+                             /* else:  if frB is a QNaN */
+                             IRExpr_Mux0X(unop(Iop_1Uto8, mkexpr(frB_isQNaN)),
+                                          /* else:  frA is a QNaN, so result = frB */
+                                          mkexpr(frB_I64),
+                                          /* then: result = frA */
+                                          mkexpr(frA_I64)),
+                             /* then: result = frB converted to QNaN */
+                             binop(Iop_Or64, mkexpr(frB_I64), mkU64(SNAN_MASK))),
+                /* then: result = frA converted to QNaN */
+                binop(Iop_Or64, mkexpr(frA_I64), mkU64(SNAN_MASK)));
+}
+
+/*
+ * Helper function for get_max_min_fp.
+ */
+static IRExpr * _get_maxmin_fp_cmp(IRTemp src1, IRTemp src2, Bool isMin)
+{
+   IRTemp src1cmpsrc2 = get_fp_cmp_CR_val( binop( Iop_CmpF64,
+                                                  unop( Iop_ReinterpI64asF64,
+                                                        mkexpr( src1 ) ),
+                                                  unop( Iop_ReinterpI64asF64,
+                                                        mkexpr( src2 ) ) ) );
+
+   return IRExpr_Mux0X( unop( Iop_1Uto8,
+                              binop( Iop_CmpEQ32,
+                                     mkexpr( src1cmpsrc2 ),
+                                     mkU32( isMin ? PPC_CMP_LT : PPC_CMP_GT ) ) ),
+                        /* else: use src2 */
+                        mkexpr( src2 ),
+                        /* then: use src1 */
+                        mkexpr( src1 ) );
+}
+
+/*
+ * Helper function for "Maximum/Minimum Double Precision" operations.
+ * Arguments: frA and frb are Ity_I64
+ * Returns Ity_I64 IRExpr that answers the "which is Maxiumum/Minimum" question
+ */
+static IRExpr * get_max_min_fp(IRTemp frA_I64, IRTemp frB_I64, Bool isMin)
+{
+   /* There are three special cases where get_fp_cmp_CR_val is not helpful
+    * for ascertaining the maximum between two doubles:
+    *   1. The max/min of +0 and -0 is +0.
+    *   2. The max/min of a QNaN and any value is that value.
+    *   3. The max/min of any value and an SNaN is that SNaN converted to a QNaN.
+    * We perform the check for [+/-]0 here in this function and use the
+    * _get_maxmin_fp_NaN helper for the two NaN cases; otherwise we call _get_maxmin_fp_cmp
+    * to do the standard comparison function.
+    */
+   IRTemp anyNaN = newTemp(Ity_I1);
+   IRTemp frA_isZero = newTemp(Ity_I1);
+   IRTemp frB_isZero = newTemp(Ity_I1);
+   assign(frA_isZero, is_Zero(frA_I64, False /*not single precision*/ ));
+   assign(frB_isZero, is_Zero(frB_I64, False /*not single precision*/ ));
+   assign(anyNaN, mkOR1(is_NaN(frA_I64), is_NaN(frB_I64)));
+#define MINUS_ZERO 0x8000000000000000ULL
+
+   return IRExpr_Mux0X( unop( Iop_1Uto8,
+                              /* If both arguments are zero . . . */
+                              mkAND1( mkexpr( frA_isZero ), mkexpr( frB_isZero ) ) ),
+                        /* else: check if either input is a NaN*/
+                        IRExpr_Mux0X( unop( Iop_1Uto8, mkexpr( anyNaN ) ),
+                                      /* else: use "comparison helper" */
+                                      _get_maxmin_fp_cmp( frB_I64, frA_I64, isMin ),
+                                      /* then: use "NaN helper" */
+                                      _get_maxmin_fp_NaN( frA_I64, frB_I64 ) ),
+                        /* then: if frA is -0 and isMin==True, return -0;
+                         *     else if frA is +0 and isMin==False; return +0;
+                         *     otherwise, simply return frB. */
+                        IRExpr_Mux0X( unop( Iop_1Uto8,
+                                            binop( Iop_CmpEQ32,
+                                                   unop( Iop_64HIto32,
+                                                         mkexpr( frA_I64 ) ),
+                                                   mkU32( isMin ? 0x80000000 : 0 ) ) ),
+                                      mkexpr( frB_I64 ),
+                                      mkU64( isMin ? MINUS_ZERO : 0ULL ) ) );
+}
+
+/*
+ * Helper function for vector/scalar double precision fp round to integer instructions.
+ */
+static IRExpr * _do_vsx_fp_roundToInt(IRTemp frB_I64, UInt opc2, UChar * insn_suffix)
+{
+
+   /* The same rules apply for x{s|v}rdpi{m|p|c|z} as for floating point round operations (fri{m|n|p|z}). */
+   IRTemp frB = newTemp(Ity_F64);
+   IRTemp frD = newTemp(Ity_F64);
+   IRTemp intermediateResult = newTemp(Ity_I64);
+   IRTemp is_SNAN = newTemp(Ity_I1);
+   IRExpr * hi32;
+   IRExpr * rxpi_rm;
+   switch (opc2 & 0x7F) {
+      case 0x72:
+         insn_suffix = "m";
+         rxpi_rm = mkU32(Irrm_NegINF);
+         break;
+      case 0x52:
+         insn_suffix = "p";
+         rxpi_rm = mkU32(Irrm_PosINF);
+         break;
+      case 0x56:
+         insn_suffix = "c";
+         rxpi_rm = get_IR_roundingmode();
+         break;
+      case 0x32:
+         insn_suffix = "z";
+         rxpi_rm = mkU32(Irrm_ZERO);
+         break;
+      case 0x12:
+         insn_suffix = "";
+         rxpi_rm = mkU32(Irrm_NEAREST);
+         break;
+
+      default: // Impossible to get here
+         vex_printf( "_do_vsx_fp_roundToInt(ppc)(opc2)\n" );
+         return NULL;
+   }
+   assign(frB, unop(Iop_ReinterpI64asF64, mkexpr(frB_I64)));
+   assign( intermediateResult,
+           binop( Iop_F64toI64S, rxpi_rm,
+                  mkexpr( frB ) ) );
+
+   /* don't use the rounded integer if frB is outside -9e18..9e18 */
+   /* F64 has only log10(2**52) significant digits anyway */
+   /* need to preserve sign of zero */
+   /*   frD = (fabs(frB) > 9e18) ? frB :
+            (sign(frB)) ? -fabs((double)intermediateResult) : (double)intermediateResult  */
+   assign( frD,
+           IRExpr_Mux0X( unop( Iop_32to8,
+                               binop( Iop_CmpF64,
+                                      IRExpr_Const( IRConst_F64( 9e18 ) ),
+                                      unop( Iop_AbsF64, mkexpr( frB ) ) ) ),
+                         IRExpr_Mux0X( unop( Iop_32to8,
+                                             binop( Iop_Shr32,
+                                                    unop( Iop_64HIto32,
+                                                          mkexpr( frB_I64 ) ),
+                                                    mkU8( 31 ) ) ),
+                                       binop( Iop_I64StoF64,
+                                              mkU32( 0 ),
+                                              mkexpr( intermediateResult ) ),
+                                       unop( Iop_NegF64,
+                                             unop( Iop_AbsF64,
+                                                   binop( Iop_I64StoF64,
+                                                          mkU32( 0 ),
+                                                          mkexpr( intermediateResult ) ) ) ) ),
+                         mkexpr( frB ) ) );
+
+   /* See Appendix "Floating-Point Round to Integer Model" in ISA doc.
+    * If frB is a SNAN, then frD <- frB, with bit 12 set to '1'.
+    */
+#define SNAN_MASK 0x0008000000000000ULL
+   hi32 = unop( Iop_64HIto32, mkexpr(frB_I64) );
+   assign( is_SNAN,
+           mkAND1( is_NaN( frB_I64 ),
+                   binop( Iop_CmpEQ32,
+                          binop( Iop_And32, hi32, mkU32( 0x00080000 ) ),
+                          mkU32( 0 ) ) ) );
+
+   return IRExpr_Mux0X( unop( Iop_1Uto8, mkexpr( is_SNAN ) ),
+                        mkexpr( frD ),
+                        unop( Iop_ReinterpI64asF64,
+                              binop( Iop_Xor64,
+                                     mkU64( SNAN_MASK ),
+                                     mkexpr( frB_I64 ) ) ) );
+}
+
+/*
+ * Miscellaneous VSX vector instructions
+ */
+static Bool
+dis_vxv_misc ( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT( theInstr );
+   UChar XB = ifieldRegXB( theInstr );
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vxv_misc(ppc)(instr)\n" );
+      return False;
+   }
+
+   switch (opc2) {
+      case 0x1B4:  // xvredp (VSX Vector Reciprocal Estimate Double-Precision)
+      case 0x194:  // xvrsqrtedp (VSX Vector Reciprocal Square Root Estimate
+                   //             Double-Precision)
+      {
+         IRExpr* ieee_one = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
+         IRExpr* rm  = get_IR_roundingmode();
+         IRTemp frB = newTemp(Ity_I64);
+         IRTemp frB2 = newTemp(Ity_I64);
+         Bool redp = opc2 == 0x1B4;
+         IRTemp sqrtHi = newTemp(Ity_F64);
+         IRTemp sqrtLo = newTemp(Ity_F64);
+         assign(frB,  unop(Iop_V128HIto64, getVSReg( XB )));
+         assign(frB2, unop(Iop_V128to64, getVSReg( XB )));
+
+         DIP("%s v%d,v%d\n", redp ? "xvredp" : "xvrsqrtedp", (UInt)XT, (UInt)XB);
+         if (!redp) {
+            assign( sqrtHi,
+                    binop( Iop_SqrtF64,
+                           rm,
+                           unop( Iop_ReinterpI64asF64, mkexpr( frB ) ) ) );
+            assign( sqrtLo,
+                    binop( Iop_SqrtF64,
+                           rm,
+                           unop( Iop_ReinterpI64asF64, mkexpr( frB2 ) ) ) );
+         }
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                triop( Iop_DivF64,
+                                       rm,
+                                       ieee_one,
+                                       redp ? unop( Iop_ReinterpI64asF64,
+                                                    mkexpr( frB ) )
+                                            : mkexpr( sqrtHi ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                triop( Iop_DivF64,
+                                       rm,
+                                       ieee_one,
+                                       redp ? unop( Iop_ReinterpI64asF64,
+                                                    mkexpr( frB2 ) )
+                                            : mkexpr( sqrtLo ) ) ) ) );
+         break;
+
+      }
+      case 0x134: // xvresp (VSX Vector Reciprocal Estimate Single-Precision)
+      case 0x114: // xvrsqrtesp (VSX Vector Reciprocal Square Root Estimate Single-Precision)
+      {
+         IRTemp b3, b2, b1, b0;
+         IRTemp res0 = newTemp(Ity_I32);
+         IRTemp res1 = newTemp(Ity_I32);
+         IRTemp res2 = newTemp(Ity_I32);
+         IRTemp res3 = newTemp(Ity_I32);
+         IRTemp sqrt3 = newTemp(Ity_F64);
+         IRTemp sqrt2 = newTemp(Ity_F64);
+         IRTemp sqrt1 = newTemp(Ity_F64);
+         IRTemp sqrt0 = newTemp(Ity_F64);
+         IRExpr* rm  = get_IR_roundingmode();
+         Bool resp = opc2 == 0x134;
+
+         IRExpr* ieee_one = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
+
+         b3 = b2 = b1 = b0 = IRTemp_INVALID;
+         DIP("%s v%d,v%d\n", resp ? "xvresp" : "xvrsqrtesp", (UInt)XT, (UInt)XB);
+         breakV128to4xF64( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+
+         if (!resp) {
+            assign( sqrt3, binop( Iop_SqrtF64, rm, mkexpr( b3 ) ) );
+            assign( sqrt2, binop( Iop_SqrtF64, rm, mkexpr( b2 ) ) );
+            assign( sqrt1, binop( Iop_SqrtF64, rm, mkexpr( b1 ) ) );
+            assign( sqrt0, binop( Iop_SqrtF64, rm, mkexpr( b0 ) ) );
+         }
+
+         assign( res0,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32,
+                                    rm,
+                                    ieee_one,
+                                    resp ? mkexpr( b0 ) : mkexpr( sqrt0 ) ) ) ) );
+         assign( res1,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32,
+                                    rm,
+                                    ieee_one,
+                                    resp ? mkexpr( b1 ) : mkexpr( sqrt1 ) ) ) ) );
+         assign( res2,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32,
+                                    rm,
+                                    ieee_one,
+                                    resp ? mkexpr( b2 ) : mkexpr( sqrt2 ) ) ) ) );
+         assign( res3,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             triop( Iop_DivF64r32,
+                                    rm,
+                                    ieee_one,
+                                    resp ? mkexpr( b3 ) : mkexpr( sqrt3 ) ) ) ) );
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, mkexpr( res3 ), mkexpr( res2 ) ),
+                          binop( Iop_32HLto64, mkexpr( res1 ), mkexpr( res0 ) ) ) );
+         break;
+      }
+      case 0x300: // xvmaxsp (VSX Vector Maximum Single-Precision)
+      case 0x320: // xvminsp (VSX Vector Minimum Single-Precision)
+      {
+         UChar XA = ifieldRegXA( theInstr );
+         IRTemp a3, a2, a1, a0;
+         IRTemp b3, b2, b1, b0;
+         IRTemp res0 = newTemp( Ity_I32 );
+         IRTemp res1 = newTemp( Ity_I32 );
+         IRTemp res2 = newTemp( Ity_I32 );
+         IRTemp res3 = newTemp( Ity_I32 );
+         IRTemp a0_I64 = newTemp( Ity_I64 );
+         IRTemp a1_I64 = newTemp( Ity_I64 );
+         IRTemp a2_I64 = newTemp( Ity_I64 );
+         IRTemp a3_I64 = newTemp( Ity_I64 );
+         IRTemp b0_I64 = newTemp( Ity_I64 );
+         IRTemp b1_I64 = newTemp( Ity_I64 );
+         IRTemp b2_I64 = newTemp( Ity_I64 );
+         IRTemp b3_I64 = newTemp( Ity_I64 );
+
+         Bool isMin = opc2 == 0x320 ? True : False;
+
+         a3 = a2 = a1 = a0 = IRTemp_INVALID;
+         b3 = b2 = b1 = b0 = IRTemp_INVALID;
+         DIP("%s v%d,v%d v%d\n", isMin ? "xvminsp" : "xvmaxsp", (UInt)XT, (UInt)XA, (UInt)XB);
+         breakV128to4xF64( getVSReg( XA ), &a3, &a2, &a1, &a0 );
+         breakV128to4xF64( getVSReg( XB ), &b3, &b2, &b1, &b0 );
+         assign( a0_I64, unop( Iop_ReinterpF64asI64, mkexpr( a0 ) ) );
+         assign( b0_I64, unop( Iop_ReinterpF64asI64, mkexpr( b0 ) ) );
+         assign( a1_I64, unop( Iop_ReinterpF64asI64, mkexpr( a1 ) ) );
+         assign( b1_I64, unop( Iop_ReinterpF64asI64, mkexpr( b1 ) ) );
+         assign( a2_I64, unop( Iop_ReinterpF64asI64, mkexpr( a2 ) ) );
+         assign( b2_I64, unop( Iop_ReinterpF64asI64, mkexpr( b2 ) ) );
+         assign( a3_I64, unop( Iop_ReinterpF64asI64, mkexpr( a3 ) ) );
+         assign( b3_I64, unop( Iop_ReinterpF64asI64, mkexpr( b3 ) ) );
+         assign( res0,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             unop( Iop_ReinterpI64asF64,
+                                   get_max_min_fp( a0_I64, b0_I64, isMin ) ) ) ) );
+         assign( res1,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             unop( Iop_ReinterpI64asF64,
+                                   get_max_min_fp( a1_I64, b1_I64, isMin ) ) ) ) );
+         assign( res2,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             unop( Iop_ReinterpI64asF64,
+                                   get_max_min_fp( a2_I64, b2_I64, isMin ) ) ) ) );
+         assign( res3,
+                 unop( Iop_ReinterpF32asI32,
+                       unop( Iop_TruncF64asF32,
+                             unop( Iop_ReinterpI64asF64,
+                                   get_max_min_fp( a3_I64, b3_I64, isMin ) ) ) ) );
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_32HLto64, mkexpr( res3 ), mkexpr( res2 ) ),
+                          binop( Iop_32HLto64, mkexpr( res1 ), mkexpr( res0 ) ) ) );
+         break;
+      }
+      case 0x380: // xvmaxdp (VSX Vector Maximum Double-Precision)
+      case 0x3A0: // xvmindp (VSX Vector Minimum Double-Precision)
+      {
+         UChar XA = ifieldRegXA( theInstr );
+         IRTemp frA = newTemp(Ity_I64);
+         IRTemp frB = newTemp(Ity_I64);
+         IRTemp frA2 = newTemp(Ity_I64);
+         IRTemp frB2 = newTemp(Ity_I64);
+         Bool isMin = opc2 == 0x3A0 ? True : False;
+
+         assign(frA,  unop(Iop_V128HIto64, getVSReg( XA )));
+         assign(frB,  unop(Iop_V128HIto64, getVSReg( XB )));
+         assign(frA2, unop(Iop_V128to64, getVSReg( XA )));
+         assign(frB2, unop(Iop_V128to64, getVSReg( XB )));
+         DIP("%s v%d,v%d v%d\n", isMin ? "xvmindp" : "xvmaxdp", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_64HLtoV128, get_max_min_fp(frA, frB, isMin), get_max_min_fp(frA2, frB2, isMin) ) );
+
+         break;
+      }
+      case 0x3c0: // xvcpsgndp (VSX Vector Copy Sign Double-Precision)
+      {
+         UChar XA = ifieldRegXA( theInstr );
+         IRTemp frA = newTemp(Ity_I64);
+         IRTemp frB = newTemp(Ity_I64);
+         IRTemp frA2 = newTemp(Ity_I64);
+         IRTemp frB2 = newTemp(Ity_I64);
+         assign(frA,  unop(Iop_V128HIto64, getVSReg( XA )));
+         assign(frB,  unop(Iop_V128HIto64, getVSReg( XB )));
+         assign(frA2, unop(Iop_V128to64, getVSReg( XA )));
+         assign(frB2, unop(Iop_V128to64, getVSReg( XB )));
+
+         DIP("xvcpsgndp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          binop( Iop_Or64,
+                                 binop( Iop_And64,
+                                        mkexpr( frA ),
+                                        mkU64( SIGN_BIT ) ),
+                                 binop( Iop_And64,
+                                        mkexpr( frB ),
+                                        mkU64( SIGN_MASK ) ) ),
+                          binop( Iop_Or64,
+                                 binop( Iop_And64,
+                                        mkexpr( frA2 ),
+                                        mkU64( SIGN_BIT ) ),
+                                 binop( Iop_And64,
+                                        mkexpr( frB2 ),
+                                        mkU64( SIGN_MASK ) ) ) ) );
+         break;
+      }
+      case 0x340: // xvcpsgnsp
+      {
+         UChar XA = ifieldRegXA( theInstr );
+         IRTemp a3_I64, a2_I64, a1_I64, a0_I64;
+         IRTemp b3_I64, b2_I64, b1_I64, b0_I64;
+         IRTemp resHi = newTemp(Ity_I64);
+         IRTemp resLo = newTemp(Ity_I64);
+
+         a3_I64 = a2_I64 = a1_I64 = a0_I64 = IRTemp_INVALID;
+         b3_I64 = b2_I64 = b1_I64 = b0_I64 = IRTemp_INVALID;
+         DIP("xvcpsgnsp v%d,v%d v%d\n",(UInt)XT, (UInt)XA, (UInt)XB);
+         breakV128to4x64U( getVSReg( XA ), &a3_I64, &a2_I64, &a1_I64, &a0_I64 );
+         breakV128to4x64U( getVSReg( XB ), &b3_I64, &b2_I64, &b1_I64, &b0_I64 );
+
+         assign( resHi,
+                 binop( Iop_32HLto64,
+                        binop( Iop_Or32,
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( a3_I64 ) ),
+                                      mkU32( SIGN_BIT32 ) ),
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( b3_I64 ) ),
+                                      mkU32( SIGN_MASK32) ) ),
+
+                        binop( Iop_Or32,
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( a2_I64 ) ),
+                                      mkU32( SIGN_BIT32 ) ),
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( b2_I64 ) ),
+                                      mkU32( SIGN_MASK32 ) ) ) ) );
+         assign( resLo,
+                 binop( Iop_32HLto64,
+                        binop( Iop_Or32,
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( a1_I64 ) ),
+                                      mkU32( SIGN_BIT32 ) ),
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( b1_I64 ) ),
+                                      mkU32( SIGN_MASK32 ) ) ),
+
+                        binop( Iop_Or32,
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( a0_I64 ) ),
+                                      mkU32( SIGN_BIT32 ) ),
+                               binop( Iop_And32,
+                                      unop(Iop_64to32, mkexpr( b0_I64 ) ),
+                                      mkU32( SIGN_MASK32 ) ) ) ) );
+         putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( resHi ), mkexpr( resLo ) ) );
+         break;
+      }
+      case 0x3B2: // xvabsdp (VSX Vector Absolute Value Double-Precision)
+      case 0x3D2: // xvnabsdp VSX Vector Negative Absolute Value Double-Precision)
+      {
+         IRTemp frB = newTemp(Ity_F64);
+         IRTemp frB2 = newTemp(Ity_F64);
+         IRTemp abs_resultHi = newTemp(Ity_F64);
+         IRTemp abs_resultLo = newTemp(Ity_F64);
+         Bool make_negative = (opc2 == 0x3D2) ? True : False;
+         assign(frB,  unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB ))));
+         assign(frB2, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, getVSReg(XB))));
+
+         DIP("xv%sabsdp v%d,v%d\n", make_negative ? "n" : "", (UInt)XT, (UInt)XB);
+         if (make_negative) {
+            assign(abs_resultHi, unop( Iop_NegF64, unop( Iop_AbsF64, mkexpr( frB ) ) ) );
+            assign(abs_resultLo, unop( Iop_NegF64, unop( Iop_AbsF64, mkexpr( frB2 ) ) ) );
+
+         } else {
+            assign(abs_resultHi, unop( Iop_AbsF64, mkexpr( frB ) ) );
+            assign(abs_resultLo, unop( Iop_AbsF64, mkexpr( frB2 ) ) );
+         }
+         putVSReg( XT, binop( Iop_64HLtoV128,
+                              unop( Iop_ReinterpF64asI64, mkexpr( abs_resultHi ) ),
+                              unop( Iop_ReinterpF64asI64, mkexpr( abs_resultLo ) ) ) );
+         break;
+      }
+      case 0x332: // xvabssp (VSX Vector Absolute Value Single-Precision)
+      case 0x352: // xvnabssp (VSX Vector Negative Absolute Value Single-Precision)
+      {
+         /*
+          * The Iop_AbsF32 IRop is not implemented for ppc64 since, up until introduction
+          * of xvabssp, there has not been an abs(sp) type of instruction.  But since emulation
+          * of this function is so easy using shifts, I choose to emulate this instruction that
+          * way versus a native instruction method of implementation.
+          */
+         Bool make_negative = (opc2 == 0x352) ? True : False;
+         IRTemp shiftVector = newTemp(Ity_V128);
+         IRTemp absVal_vector = newTemp(Ity_V128);
+         assign( shiftVector,
+                 binop( Iop_64HLtoV128,
+                        binop( Iop_32HLto64, mkU32( 1 ), mkU32( 1 ) ),
+                        binop( Iop_32HLto64, mkU32( 1 ), mkU32( 1 ) ) ) );
+         assign( absVal_vector,
+                   binop( Iop_Shr32x4,
+                          binop( Iop_Shl32x4,
+                                 getVSReg( XB ),
+                                 mkexpr( shiftVector ) ),
+                          mkexpr( shiftVector ) ) );
+         if (make_negative) {
+            IRTemp signBit_vector = newTemp(Ity_V128);
+            assign( signBit_vector,
+                    binop( Iop_64HLtoV128,
+                           binop( Iop_32HLto64,
+                                  mkU32( 0x80000000 ),
+                                  mkU32( 0x80000000 ) ),
+                           binop( Iop_32HLto64,
+                                  mkU32( 0x80000000 ),
+                                  mkU32( 0x80000000 ) ) ) );
+            putVSReg( XT,
+                      binop( Iop_OrV128,
+                             mkexpr( absVal_vector ),
+                             mkexpr( signBit_vector ) ) );
+         } else {
+            putVSReg( XT, mkexpr( absVal_vector ) );
+         }
+         break;
+      }
+      case 0x3F2: // xvnegdp (VSX Vector Negate Double-Precision)
+      {
+         IRTemp frB = newTemp(Ity_F64);
+         IRTemp frB2 = newTemp(Ity_F64);
+         assign(frB,  unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB ))));
+         assign(frB2, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, getVSReg(XB))));
+         DIP("xvnegdp v%d,v%d\n",  (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_NegF64, mkexpr( frB ) ) ),
+                          unop( Iop_ReinterpF64asI64,
+                                unop( Iop_NegF64, mkexpr( frB2 ) ) ) ) );
+         break;
+      }
+      case 0x192: // xvrdpi  (VSX Vector Round to Double-Precision Integer using round toward Nearest Away)
+      case 0x1D6: // xvrdpic (VSX Vector Round to Double-Precision Integer using Current rounding mode)
+      case 0x1F2: // xvrdpim (VSX Vector Round to Double-Precision Integer using round toward -Infinity)
+      case 0x1D2: // xvrdpip (VSX Vector Round to Double-Precision Integer using round toward +Infinity)
+      case 0x1B2: // xvrdpiz (VSX Vector Round to Double-Precision Integer using round toward Zero)
+      {
+         IRTemp frBHi_I64 = newTemp(Ity_I64);
+         IRTemp frBLo_I64 = newTemp(Ity_I64);
+         IRExpr * frD_fp_roundHi = NULL;
+         IRExpr * frD_fp_roundLo = NULL;
+         UChar * insn_suffix = NULL;
+
+         assign( frBHi_I64, unop( Iop_V128HIto64, getVSReg( XB ) ) );
+         frD_fp_roundHi = _do_vsx_fp_roundToInt(frBHi_I64, opc2, insn_suffix);
+         assign( frBLo_I64, unop( Iop_V128to64, getVSReg( XB ) ) );
+         frD_fp_roundLo = _do_vsx_fp_roundToInt(frBLo_I64, opc2, insn_suffix);
+
+         DIP("xvrdpi%s v%d,v%d\n", insn_suffix, (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64, frD_fp_roundHi ),
+                          unop( Iop_ReinterpF64asI64, frD_fp_roundLo ) ) );
+         break;
+      }
+      case 0x112: // xvrspi  (VSX Vector Round to Single-Precision Integer using round toward Nearest Away)
+      case 0x156: // xvrspic (VSX Vector Round to SinglePrecision Integer using Current rounding mode)
+      case 0x172: // xvrspim (VSX Vector Round to SinglePrecision Integer using round toward -Infinity)
+      case 0x152: // xvrspip (VSX Vector Round to SinglePrecision Integer using round toward +Infinity)
+      case 0x132: // xvrspiz (VSX Vector Round to SinglePrecision Integer using round toward Zero)
+      {
+         UChar * insn_suffix = NULL;
+         IROp op;
+         if (opc2 != 0x156) {
+            // Use pre-defined IRop's for vrfi{m|n|p|z}
+            switch (opc2) {
+               case 0x112:
+                  insn_suffix = "";
+                  op = Iop_RoundF32x4_RN;
+                  break;
+               case 0x172:
+                  insn_suffix = "m";
+                  op = Iop_RoundF32x4_RM;
+                  break;
+               case 0x152:
+                  insn_suffix = "p";
+                  op = Iop_RoundF32x4_RP;
+                  break;
+               case 0x132:
+                  insn_suffix = "z";
+                  op = Iop_RoundF32x4_RZ;
+                  break;
+
+               default:
+                  vex_printf( "dis_vxv_misc(ppc)(vrspi<x>)(opc2)\n" );
+                  return False;
+            }
+            DIP("xvrspi%s v%d,v%d\n", insn_suffix, (UInt)XT, (UInt)XB);
+            putVSReg( XT, unop( op, getVSReg(XB) ) );
+         } else {
+            // Handle xvrspic.  Unfortunately there is no corresponding "vfric" instruction.
+            IRExpr * frD_fp_roundb3, * frD_fp_roundb2, * frD_fp_roundb1, * frD_fp_roundb0;
+            IRTemp b3_F64, b2_F64, b1_F64, b0_F64;
+            IRTemp b3_I64 = newTemp(Ity_I64);
+            IRTemp b2_I64 = newTemp(Ity_I64);
+            IRTemp b1_I64 = newTemp(Ity_I64);
+            IRTemp b0_I64 = newTemp(Ity_I64);
+
+            b3_F64 = b2_F64 = b1_F64 = b0_F64 = IRTemp_INVALID;
+            frD_fp_roundb3 = frD_fp_roundb2 = frD_fp_roundb1 = frD_fp_roundb0 = NULL;
+            breakV128to4xF64( getVSReg(XB), &b3_F64, &b2_F64, &b1_F64, &b0_F64);
+            assign(b3_I64, unop(Iop_ReinterpF64asI64, mkexpr(b3_F64)));
+            assign(b2_I64, unop(Iop_ReinterpF64asI64, mkexpr(b2_F64)));
+            assign(b1_I64, unop(Iop_ReinterpF64asI64, mkexpr(b1_F64)));
+            assign(b0_I64, unop(Iop_ReinterpF64asI64, mkexpr(b0_F64)));
+            frD_fp_roundb3 = unop(Iop_TruncF64asF32,
+                                  _do_vsx_fp_roundToInt(b3_I64, opc2, insn_suffix));
+            frD_fp_roundb2 = unop(Iop_TruncF64asF32,
+                                  _do_vsx_fp_roundToInt(b2_I64, opc2, insn_suffix));
+            frD_fp_roundb1 = unop(Iop_TruncF64asF32,
+                                  _do_vsx_fp_roundToInt(b1_I64, opc2, insn_suffix));
+            frD_fp_roundb0 = unop(Iop_TruncF64asF32,
+                                  _do_vsx_fp_roundToInt(b0_I64, opc2, insn_suffix));
+            DIP("xvrspic v%d,v%d\n", (UInt)XT, (UInt)XB);
+            putVSReg( XT,
+                      binop( Iop_64HLtoV128,
+                             binop( Iop_32HLto64,
+                                    unop( Iop_ReinterpF32asI32, frD_fp_roundb3 ),
+                                    unop( Iop_ReinterpF32asI32, frD_fp_roundb2 ) ),
+                             binop( Iop_32HLto64,
+                                    unop( Iop_ReinterpF32asI32, frD_fp_roundb1 ),
+                                    unop( Iop_ReinterpF32asI32, frD_fp_roundb0 ) ) ) );
+         }
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vxv_misc(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+
+/*
+ * VSX Scalar Floating Point Arithmetic Instructions
+ */
+static Bool
+dis_vxs_arith ( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT( theInstr );
+   UChar XA = ifieldRegXA( theInstr );
+   UChar XB = ifieldRegXB( theInstr );
+   IRExpr* rm = get_IR_roundingmode();
+   IRTemp frA = newTemp(Ity_F64);
+   IRTemp frB = newTemp(Ity_F64);
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vxs_arith(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign(frA, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XA ))));
+   assign(frB, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB ))));
+
+   /* For all the VSX sclar arithmetic instructions, the contents of doubleword element 1
+    * of VSX[XT] are undefined after the operation; therefore, we can simply set
+    * element to zero where it makes sense to do so.
+    */
+   switch (opc2) {
+      case 0x080: // xsadddp (VSX scalar add double-precision)
+         DIP("xsadddp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    triop( Iop_AddF64, rm,
+                                                           mkexpr( frA ),
+                                                           mkexpr( frB ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+      case 0x0E0: // xsdivdp (VSX scalar divide double-precision)
+         DIP("xsdivdp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    triop( Iop_DivF64, rm,
+                                                           mkexpr( frA ),
+                                                           mkexpr( frB ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+      case 0x084: case 0x0A4: // xsmaddadp, xsmaddmdp (VSX scalar multiply-add double-precision)
+      {
+         IRTemp frT = newTemp(Ity_F64);
+         Bool mdp = opc2 == 0x0A4;
+         DIP("xsmadd%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64,
+                                                        getVSReg( XT ) ) ) );
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    qop( Iop_MAddF64, rm,
+                                                         mkexpr( frA ),
+                                                         mkexpr( mdp ? frT : frB ),
+                                                         mkexpr( mdp ? frB : frT ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+      }
+      case 0x0C4: case 0x0E4: // xsmsubadp, xsmsubmdp (VSX scalar multiply-subtract double-precision)
+      {
+         IRTemp frT = newTemp(Ity_F64);
+         Bool mdp = opc2 == 0x0E4;
+         DIP("xsmsub%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64,
+                                                        getVSReg( XT ) ) ) );
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    qop( Iop_MSubF64, rm,
+                                                         mkexpr( frA ),
+                                                         mkexpr( mdp ? frT : frB ),
+                                                         mkexpr( mdp ? frB : frT ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+      }
+      case 0x284: case 0x2A4: // xsnmaddadp, xsnmaddmdp (VSX scalar multiply-add double-precision)
+      {
+         /* TODO: mpj -- Naturally, I expected to be able to leverage the implementation
+          * of fnmadd and use pretty much the same code. However, that code has a bug in the
+          * way it blindly negates the signbit, even if the floating point result is a NaN.
+          * So, the TODO is to fix fnmadd (which I'll do in a different patch).
+          */
+         Bool mdp = opc2 == 0x2A4;
+         IRTemp frT = newTemp(Ity_F64);
+         IRTemp maddResult = newTemp(Ity_I64);
+
+         DIP("xsnmadd%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64,
+                                                        getVSReg( XT ) ) ) );
+         assign( maddResult, unop( Iop_ReinterpF64asI64, qop( Iop_MAddF64, rm,
+                                                              mkexpr( frA ),
+                                                              mkexpr( mdp ? frT : frB ),
+                                                              mkexpr( mdp ? frB : frT ) ) ) );
+
+         putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( getNegatedResult(maddResult) ),
+                              mkU64( 0 ) ) );
+         break;
+      }
+      case 0x2C4: case 0x2E4: // xsnmsubadp, xsnmsubmdp (VSX Scalar Negative Multiply-Subtract Double-Precision)
+      {
+         IRTemp frT = newTemp(Ity_F64);
+         Bool mdp = opc2 == 0x2E4;
+         IRTemp msubResult = newTemp(Ity_I64);
+
+         DIP("xsnmsub%sdp v%d,v%d,v%d\n", mdp ? "m" : "a", (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( frT, unop( Iop_ReinterpI64asF64, unop( Iop_V128HIto64,
+                                                        getVSReg( XT ) ) ) );
+         assign(msubResult, unop( Iop_ReinterpF64asI64,
+                                      qop( Iop_MSubF64,
+                                           rm,
+                                           mkexpr( frA ),
+                                           mkexpr( mdp ? frT : frB ),
+                                           mkexpr( mdp ? frB : frT ) ) ));
+
+         putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( getNegatedResult(msubResult) ), mkU64( 0 ) ) );
+
+         break;
+      }
+
+      case 0x0C0: // xsmuldp (VSX Scalar Multiply Double-Precision)
+         DIP("xsmuldp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    triop( Iop_MulF64, rm,
+                                                           mkexpr( frA ),
+                                                           mkexpr( frB ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+      case 0x0A0: // xssubdp (VSX Scalar Subtract Double-Precision)
+         DIP("xssubdp v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                    triop( Iop_SubF64, rm,
+                                                           mkexpr( frA ),
+                                                           mkexpr( frB ) ) ),
+                              mkU64( 0 ) ) );
+         break;
+
+      case 0x096: // xssqrtdp (VSX Scalar Square Root Double-Precision)
+         DIP("xssqrtdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         putVSReg( XT,  binop( Iop_64HLtoV128, unop( Iop_ReinterpF64asI64,
+                                                     binop( Iop_SqrtF64, rm,
+                                                            mkexpr( frB ) ) ),
+                               mkU64( 0 ) ) );
+         break;
+
+      case 0x0F4: // xstdivdp (VSX Scalar Test for software Divide Double-Precision)
+      {
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp frA_I64 = newTemp(Ity_I64);
+         IRTemp frB_I64 = newTemp(Ity_I64);
+         DIP("xstdivdp crf%d,v%d,v%d\n", crfD, (UInt)XA, (UInt)XB);
+         assign( frA_I64, unop( Iop_ReinterpF64asI64, mkexpr( frA ) ) );
+         assign( frB_I64, unop( Iop_ReinterpF64asI64, mkexpr( frB ) ) );
+         putGST_field( PPC_GST_CR, do_fp_tdiv(frA_I64, frB_I64), crfD );
+         break;
+      }
+      case 0x0D4: // xstsqrtdp (VSX Vector Test for software Square Root Double-Precision)
+      {
+         IRTemp frB_I64 = newTemp(Ity_I64);
+         UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+         IRTemp flags = newTemp(Ity_I32);
+         IRTemp  fe_flag, fg_flag;
+         fe_flag = fg_flag = IRTemp_INVALID;
+         DIP("xstsqrtdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         assign( frB_I64, unop(Iop_V128HIto64, getVSReg( XB )) );
+         do_fp_tsqrt(frB_I64, False /*not single precision*/, &fe_flag, &fg_flag);
+         /* The CR field consists of fl_flag || fg_flag || fe_flag || 0b0
+          * where fl_flag == 1 on ppc64.
+          */
+         assign( flags,
+                 binop( Iop_Or32,
+                        binop( Iop_Or32, mkU32( 8 ), // fl_flag
+                               binop( Iop_Shl32, mkexpr(fg_flag), mkU8( 2 ) ) ),
+                        binop( Iop_Shl32, mkexpr(fe_flag), mkU8( 1 ) ) ) );
+         putGST_field( PPC_GST_CR, mkexpr(flags), crfD );
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vxs_arith(ppc)(opc2)\n" );
+         return False;
+   }
+
+   return True;
+}
+
+
+/*
+ * VSX Floating Point Compare Instructions
+ */
+static Bool
+dis_vx_cmp( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form and XX2-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar crfD     = toUChar( IFIELD( theInstr, 23, 3 ) );
+   IRTemp ccPPC32;
+   UChar XA       = ifieldRegXA ( theInstr );
+   UChar XB       = ifieldRegXB ( theInstr );
+   IRTemp frA     = newTemp(Ity_F64);
+   IRTemp frB     = newTemp(Ity_F64);
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vx_cmp(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign(frA, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XA ))));
+   assign(frB, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, getVSReg( XB ))));
+   switch (opc2) {
+      case 0x08C: case 0x0AC: // xscmpudp, xscmpodp
+         /* Note: Differences between xscmpudp and xscmpodp are only in
+          * exception flag settings, which aren't supported anyway. */
+         DIP("xscmp%sdp crf%d,fr%u,fr%u\n", opc2 == 0x08c ? "u" : "o",
+                                           crfD, (UInt)XA, (UInt)XB);
+         ccPPC32 = get_fp_cmp_CR_val( binop(Iop_CmpF64, mkexpr(frA), mkexpr(frB)));
+         putGST_field( PPC_GST_CR, mkexpr(ccPPC32), crfD );
+         break;
+
+      default:
+         vex_printf( "dis_vx_cmp(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+static void
+do_vvec_fp_cmp ( IRTemp vA, IRTemp vB, UChar XT, UChar flag_rC,
+                 ppc_cmp_t cmp_type )
+{
+   IRTemp frA_hi     = newTemp(Ity_F64);
+   IRTemp frB_hi     = newTemp(Ity_F64);
+   IRTemp frA_lo     = newTemp(Ity_F64);
+   IRTemp frB_lo     = newTemp(Ity_F64);
+   IRTemp ccPPC32    = newTemp(Ity_I32);
+   IRTemp ccIR_hi;
+   IRTemp ccIR_lo;
+
+   IRTemp hiResult = newTemp(Ity_I64);
+   IRTemp loResult = newTemp(Ity_I64);
+   IRTemp hiEQlo = newTemp(Ity_I1);
+   IRTemp all_elem_true = newTemp(Ity_I32);
+   IRTemp all_elem_false = newTemp(Ity_I32);
+
+   assign(frA_hi, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, mkexpr( vA ))));
+   assign(frB_hi, unop(Iop_ReinterpI64asF64, unop(Iop_V128HIto64, mkexpr( vB ))));
+   assign(frA_lo, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, mkexpr( vA ))));
+   assign(frB_lo, unop(Iop_ReinterpI64asF64, unop(Iop_V128to64, mkexpr( vB ))));
+
+   ccIR_hi = get_fp_cmp_CR_val( binop( Iop_CmpF64,
+                                       mkexpr( frA_hi ),
+                                       mkexpr( frB_hi ) ) );
+   ccIR_lo = get_fp_cmp_CR_val( binop( Iop_CmpF64,
+                                       mkexpr( frA_lo ),
+                                       mkexpr( frB_lo ) ) );
+
+   if (cmp_type != PPC_CMP_GE) {
+      assign( hiResult,
+              unop( Iop_1Sto64,
+                    binop( Iop_CmpEQ32, mkexpr( ccIR_hi ), mkU32( cmp_type ) ) ) );
+      assign( loResult,
+              unop( Iop_1Sto64,
+                    binop( Iop_CmpEQ32, mkexpr( ccIR_lo ), mkU32( cmp_type ) ) ) );
+   } else {
+      // For PPC_CMP_GE, one element compare may return "4" (for "greater than") and
+      // the other element compare may return "2" (for "equal to").
+      IRTemp lo_GE = newTemp(Ity_I1);
+      IRTemp hi_GE = newTemp(Ity_I1);
+
+      assign(hi_GE, mkOR1( binop( Iop_CmpEQ32, mkexpr( ccIR_hi ), mkU32( 2 ) ),
+                           binop( Iop_CmpEQ32, mkexpr( ccIR_hi ), mkU32( 4 ) ) ) );
+      assign( hiResult,unop( Iop_1Sto64, mkexpr( hi_GE ) ) );
+
+      assign(lo_GE, mkOR1( binop( Iop_CmpEQ32, mkexpr( ccIR_lo ), mkU32( 2 ) ),
+                           binop( Iop_CmpEQ32, mkexpr( ccIR_lo ), mkU32( 4 ) ) ) );
+      assign( loResult, unop( Iop_1Sto64, mkexpr( lo_GE ) ) );
+   }
+
+   // The [hi/lo]Result will be all 1's or all 0's.  We just look at the lower word.
+   assign( hiEQlo,
+           binop( Iop_CmpEQ32,
+                  unop( Iop_64to32, mkexpr( hiResult ) ),
+                  unop( Iop_64to32, mkexpr( loResult ) ) ) );
+   putVSReg( XT,
+             binop( Iop_64HLtoV128, mkexpr( hiResult ), mkexpr( loResult ) ) );
+
+   assign( all_elem_true,
+           unop( Iop_1Uto32,
+                 mkAND1( mkexpr( hiEQlo ),
+                         binop( Iop_CmpEQ32,
+                                mkU32( 0xffffffff ),
+                                unop( Iop_64to32,
+                                mkexpr( hiResult ) ) ) ) ) );
+
+   assign( all_elem_false,
+           unop( Iop_1Uto32,
+                 mkAND1( mkexpr( hiEQlo ),
+                         binop( Iop_CmpEQ32,
+                                mkU32( 0 ),
+                                unop( Iop_64to32,
+                                mkexpr( hiResult ) ) ) ) ) );
+   assign( ccPPC32,
+           binop( Iop_Or32,
+                  binop( Iop_Shl32, mkexpr( all_elem_false ), mkU8( 1 ) ),
+                  binop( Iop_Shl32, mkexpr( all_elem_true ), mkU8( 3 ) ) ) );
+
+   if (flag_rC) {
+      putGST_field( PPC_GST_CR, mkexpr(ccPPC32), 6 );
+   }
+}
+
+/*
+ * VSX Vector Compare Instructions
+ */
+static Bool
+dis_vvec_cmp( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT ( theInstr );
+   UChar XA = ifieldRegXA ( theInstr );
+   UChar XB = ifieldRegXB ( theInstr );
+   UChar flag_rC  = ifieldBIT10(theInstr);
+   IRTemp vA = newTemp( Ity_V128 );
+   IRTemp vB = newTemp( Ity_V128 );
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vvec_cmp(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( vA, getVSReg( XA ) );
+   assign( vB, getVSReg( XB ) );
+
+   switch (opc2) {
+      case 0x18C: case 0x38C:  // xvcmpeqdp[.] (VSX Vector Compare Equal To Double-Precision [ & Record ])
+      {
+         DIP("xvcmpeqdp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         do_vvec_fp_cmp(vA, vB, XT, flag_rC, PPC_CMP_EQ);
+         break;
+      }
+
+      case 0x1CC: case 0x3CC: // xvcmpgedp[.] (VSX Vector Compare Greater Than or Equal To Double-Precision [ & Record ])
+      {
+         DIP("xvcmpgedp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         do_vvec_fp_cmp(vA, vB, XT, flag_rC, PPC_CMP_GE);
+         break;
+      }
+
+      case 0x1AC: case 0x3AC: // xvcmpgtdp[.] (VSX Vector Compare Greater Than Double-Precision [ & Record ])
+      {
+         DIP("xvcmpgtdp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         do_vvec_fp_cmp(vA, vB, XT, flag_rC, PPC_CMP_GT);
+         break;
+      }
+
+      case 0x10C: case 0x30C: // xvcmpeqsp[.] (VSX Vector Compare Equal To Single-Precision [ & Record ])
+      {
+         IRTemp vD = newTemp(Ity_V128);
+
+         DIP("xvcmpeqsp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( vD, binop(Iop_CmpEQ32Fx4, mkexpr(vA), mkexpr(vB)) );
+         putVSReg( XT, mkexpr(vD) );
+         if (flag_rC) {
+            set_AV_CR6( mkexpr(vD), True );
+         }
+         break;
+      }
+
+      case 0x14C: case 0x34C: // xvcmpgesp[.] (VSX Vector Compare Greater Than or Equal To Single-Precision [ & Record ])
+      {
+         IRTemp vD = newTemp(Ity_V128);
+
+         DIP("xvcmpgesp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( vD, binop(Iop_CmpGE32Fx4, mkexpr(vA), mkexpr(vB)) );
+         putVSReg( XT, mkexpr(vD) );
+         if (flag_rC) {
+            set_AV_CR6( mkexpr(vD), True );
+         }
+         break;
+      }
+
+      case 0x12C: case 0x32C: //xvcmpgtsp[.] (VSX Vector Compare Greater Than Single-Precision [ & Record ])
+      {
+         IRTemp vD = newTemp(Ity_V128);
+
+         DIP("xvcmpgtsp%s crf%d,fr%u,fr%u\n", (flag_rC ? ".":""),
+             (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( vD, binop(Iop_CmpGT32Fx4, mkexpr(vA), mkexpr(vB)) );
+         putVSReg( XT, mkexpr(vD) );
+         if (flag_rC) {
+            set_AV_CR6( mkexpr(vD), True );
+         }
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vvec_cmp(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+/*
+ * Miscellaneous VSX Scalar Instructions
+ */
+static Bool
+dis_vxs_misc( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form and XX2-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT ( theInstr );
+   UChar XA = ifieldRegXA ( theInstr );
+   UChar XB = ifieldRegXB ( theInstr );
+   IRTemp vA = newTemp( Ity_V128 );
+   IRTemp vB = newTemp( Ity_V128 );
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vxs_misc(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( vA, getVSReg( XA ) );
+   assign( vB, getVSReg( XB ) );
+
+   /* For all the VSX move instructions, the contents of doubleword element 1
+    * of VSX[XT] are undefined after the operation; therefore, we can simply
+    * move the entire array element where it makes sense to do so.
+    */
+
+   switch (opc2) {
+      case 0x2B2: // xsabsdp (VSX scalar absolute value double-precision
+      {
+         /* Move abs val of dw 0 of VSX[XB] to dw 0 of VSX[XT]. */
+         IRTemp absVal = newTemp(Ity_V128);
+         assign(absVal, binop(Iop_ShrV128, binop(Iop_ShlV128, mkexpr(vB), mkU8(1)), mkU8(1)));
+         DIP("xsabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         putVSReg(XT, mkexpr(absVal));
+         break;
+      }
+      case 0x2C0: // xscpsgndp
+      {
+         /* Scalar copy sign double-precision */
+         IRTemp vecA_signbit = newTemp(Ity_V128);
+         IRTemp vecB_no_signbit = newTemp(Ity_V128);
+         IRTemp vec_result = newTemp(Ity_V128);
+         DIP("xscpsgndp v%d,v%d v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
+                                                             mkexpr( vB ),
+                                                             mkU8( 1 ) ),
+                                         mkU8( 1 ) ) );
+         assign( vecA_signbit, binop( Iop_ShlV128, binop( Iop_ShrV128,
+                                                          mkexpr( vA ),
+                                                          mkU8( 127 ) ),
+                                      mkU8( 127 ) ) );
+         assign( vec_result, binop( Iop_OrV128, mkexpr(vecA_signbit), mkexpr( vecB_no_signbit ) ) );
+         putVSReg(XT, mkexpr(vec_result));
+         break;
+      }
+      case 0x2D2: // xsnabsdp
+      {
+         /* Scalar negative absolute value double-precision */
+         IRTemp vec_neg_signbit = newTemp(Ity_V128);
+         DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         assign( vec_neg_signbit, unop( Iop_NotV128, binop( Iop_ShrV128,
+                                                            mkV128( 0xffff ),
+                                                            mkU8( 1 ) ) ) );
+         putVSReg(XT, binop(Iop_OrV128, mkexpr(vec_neg_signbit), mkexpr(vB)));
+         break;
+      }
+      case 0x2F2: // xsnegdp
+      {
+         /* Scalar negate double-precision */
+         IRTemp vecB_no_signbit = newTemp(Ity_V128);
+         IRTemp vecB_signbit_comp = newTemp(Ity_V128);
+         DIP("xsnabsdp v%d,v%d\n", (UInt)XT, (UInt)XB);
+         assign( vecB_no_signbit, binop( Iop_ShrV128, binop( Iop_ShlV128,
+                                                             mkexpr( vB ),
+                                                             mkU8( 1 ) ),
+                                         mkU8( 1 ) ) );
+         assign( vecB_signbit_comp, binop( Iop_ShlV128,
+                                           unop( Iop_NotV128,
+                                                 binop( Iop_ShrV128,
+                                                        mkexpr( vB ),
+                                                        mkU8( 127 ) ) ),
+                                           mkU8( 127 ) ) );
+         putVSReg( XT, binop( Iop_OrV128, mkexpr( vecB_no_signbit ),
+                              mkexpr( vecB_signbit_comp ) ) );
+         break;
+      }
+      case 0x280: // xsmaxdp (VSX Scalar Maximum Double-Precision)
+      case 0x2A0: // xsmindp (VSX Scalar Minimum Double-Precision)
+      {
+         IRTemp frA     = newTemp(Ity_I64);
+         IRTemp frB     = newTemp(Ity_I64);
+         Bool isMin = opc2 == 0x2A0 ? True : False;
+         DIP("%s v%d,v%d v%d\n", isMin ? "xsmaxdp" : "xsmindp", (UInt)XT, (UInt)XA, (UInt)XB);
+
+         assign(frA, unop(Iop_V128HIto64, mkexpr( vA )));
+         assign(frB, unop(Iop_V128HIto64, mkexpr( vB )));
+         putVSReg( XT, binop( Iop_64HLtoV128, get_max_min_fp(frA, frB, isMin), mkU64( 0 ) ) );
+
+         break;
+      }
+      case 0x0F2: // xsrdpim (VSX Scalar Round to Double-Precision Integer using round toward -Infinity)
+      case 0x0D2: // xsrdpip (VSX Scalar Round to Double-Precision Integer using round toward +Infinity)
+      case 0x0D6: // xsrdpic (VSX Scalar Round to Double-Precision Integer using Current rounding mode)
+      case 0x0B2: // xsrdpiz (VSX Scalar Round to Double-Precision Integer using round toward Zero)
+      case 0x092: // xsrdpi  (VSX Scalar Round to Double-Precision Integer using round toward Nearest Away)
+      {
+         IRTemp frB_I64 = newTemp(Ity_I64);
+         IRExpr * frD_fp_round = NULL;
+         UChar * insn_suffix = NULL;
+
+         assign(frB_I64, unop(Iop_V128HIto64, mkexpr( vB )));
+         frD_fp_round = _do_vsx_fp_roundToInt(frB_I64, opc2, insn_suffix);
+
+         DIP("xsrdpi%s v%d,v%d\n", insn_suffix, (UInt)XT, (UInt)XB);
+         putVSReg( XT,
+                   binop( Iop_64HLtoV128,
+                          unop( Iop_ReinterpF64asI64, frD_fp_round),
+                          mkU64( 0 ) ) );
+         break;
+      }
+      case 0x0B4: // xsredp (VSX Scalar Reciprocal Estimate Double-Precision)
+      case 0x094: // xsrsqrtedp (VSX Scalar Reciprocal Square Root Estimate Double-Precision)
+
+      {
+         IRTemp frB = newTemp(Ity_F64);
+         IRTemp sqrt = newTemp(Ity_F64);
+         IRExpr* ieee_one = IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL));
+         IRExpr* rm  = get_IR_roundingmode();
+         Bool redp = opc2 == 0x0B4;
+         DIP("%s v%d,v%d\n", redp ? "xsredp" : "xsrsqrtedp", (UInt)XT, (UInt)XB);
+         assign( frB,
+                 unop( Iop_ReinterpI64asF64,
+                       unop( Iop_V128HIto64, mkexpr( vB ) ) ) );
+
+         if (!redp)
+            assign( sqrt,
+                    binop( Iop_SqrtF64,
+                           rm,
+                           mkexpr(frB) ) );
+         putVSReg( XT,
+                      binop( Iop_64HLtoV128,
+                             unop( Iop_ReinterpF64asI64,
+                                   triop( Iop_DivF64,
+                                          rm,
+                                          ieee_one,
+                                          redp ? mkexpr( frB ) : mkexpr( sqrt ) ) ),
+                             mkU64( 0 ) ) );
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vxs_misc(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+/*
+ * VSX Logical Instructions
+ */
+static Bool
+dis_vx_logic ( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT ( theInstr );
+   UChar XA = ifieldRegXA ( theInstr );
+   UChar XB = ifieldRegXB ( theInstr );
+   IRTemp vA = newTemp( Ity_V128 );
+   IRTemp vB = newTemp( Ity_V128 );
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vx_logic(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( vA, getVSReg( XA ) );
+   assign( vB, getVSReg( XB ) );
+
+   switch (opc2) {
+      case 0x268: // xxlxor
+         DIP("xxlxor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_XorV128, mkexpr( vA ), mkexpr( vB ) ) );
+         break;
+      case 0x248: // xxlor
+         DIP("xxlor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_OrV128, mkexpr( vA ), mkexpr( vB ) ) );
+         break;
+      case 0x288: // xxlnor
+         DIP("xxlnor v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, unop( Iop_NotV128, binop( Iop_OrV128, mkexpr( vA ),
+                                                 mkexpr( vB ) ) ) );
+         break;
+      case 0x208: // xxland
+         DIP("xxland v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_AndV128, mkexpr( vA ), mkexpr( vB ) ) );
+         break;
+      case 0x228: //xxlandc
+         DIP("xxlandc v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, binop( Iop_AndV128, mkexpr( vA ), unop( Iop_NotV128,
+                                                               mkexpr( vB ) ) ) );
+         break;
+      default:
+         vex_printf( "dis_vx_logic(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+/*
+ * VSX Load Instructions
+ * NOTE: VSX supports word-aligned storage access.
+ */
+static Bool
+dis_vx_load ( UInt theInstr )
+{
+   /* XX1-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT ( theInstr );
+   UChar rA_addr = ifieldRegA( theInstr );
+   UChar rB_addr = ifieldRegB( theInstr );
+   UInt opc2 = ifieldOPClo10( theInstr );
+
+   IRType ty = mode64 ? Ity_I64 : Ity_I32;
+   IRTemp EA = newTemp( ty );
+
+   if (opc1 != 0x1F) {
+      vex_printf( "dis_vx_load(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+
+   switch (opc2) {
+   case 0x24C: // lxsdx
+   {
+      IRExpr * exp;
+      DIP("lxsdx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
+      exp = loadBE( Ity_I64, mkexpr( EA ) );
+      // We need to pass an expression of type Ity_V128 with putVSReg, but the load
+      // we just performed is only a DW.  But since the contents of VSR[XT] element 1
+      // are undefined after this operation, we can just do a splat op.
+      putVSReg( XT, binop( Iop_64HLtoV128, exp, exp ) );
+      break;
+   }
+   case 0x34C: // lxvd2x
+   {
+      IROp addOp = ty == Ity_I64 ? Iop_Add64 : Iop_Add32;
+      IRExpr * high, *low;
+      ULong ea_off = 8;
+      IRExpr* high_addr;
+      DIP("lxvd2x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
+      high = loadBE( Ity_I64, mkexpr( EA ) );
+      high_addr = binop( addOp, mkexpr( EA ), ty == Ity_I64 ? mkU64( ea_off )
+            : mkU32( ea_off ) );
+      low = loadBE( Ity_I64, high_addr );
+      putVSReg( XT, binop( Iop_64HLtoV128, high, low ) );
+      break;
+   }
+   case 0x14C: // lxvdsx
+   {
+      IRTemp data = newTemp(Ity_I64);
+      DIP("lxvdsx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
+      assign( data, loadBE( Ity_I64, mkexpr( EA ) ) );
+      putVSReg( XT, binop( Iop_64HLtoV128, mkexpr( data ), mkexpr( data ) ) );
+      break;
+   }
+   case 0x30C:
+   {
+      IRExpr * t3, *t2, *t1, *t0;
+      UInt ea_off = 0;
+      IRExpr* irx_addr;
+
+      DIP("lxvw4x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
+      t3 = loadBE( Ity_I32,  mkexpr( EA ) );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      t2 = loadBE( Ity_I32, irx_addr );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      t1 = loadBE( Ity_I32, irx_addr );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      t0 = loadBE( Ity_I32, irx_addr );
+      putVSReg( XT, binop( Iop_64HLtoV128, binop( Iop_32HLto64, t3, t2 ),
+                           binop( Iop_32HLto64, t1, t0 ) ) );
+      break;
+   }
+   default:
+      vex_printf( "dis_vx_load(ppc)(opc2)\n" );
+      return False;
+   }
+   return True;
+}
+
+/*
+ * VSX Store Instructions
+ * NOTE: VSX supports word-aligned storage access.
+ */
+static Bool
+dis_vx_store ( UInt theInstr )
+{
+   /* XX1-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XS = ifieldRegXS( theInstr );
+   UChar rA_addr = ifieldRegA( theInstr );
+   UChar rB_addr = ifieldRegB( theInstr );
+   IRTemp vS = newTemp( Ity_V128 );
+   UInt opc2 = ifieldOPClo10( theInstr );
+
+   IRType ty = mode64 ? Ity_I64 : Ity_I32;
+   IRTemp EA = newTemp( ty );
+
+   if (opc1 != 0x1F) {
+      vex_printf( "dis_vx_store(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( EA, ea_rAor0_idxd( rA_addr, rB_addr ) );
+   assign( vS, getVSReg( XS ) );
+
+   switch (opc2) {
+   case 0x2CC:
+   {
+      IRExpr * high64;
+      DIP("stxsdx %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
+      high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
+      storeBE( mkexpr( EA ), high64 );
+      break;
+   }
+   case 0x3CC:
+   {
+      IRExpr * high64, *low64;
+      DIP("stxvd2x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
+      high64 = unop( Iop_V128HIto64, mkexpr( vS ) );
+      low64 = unop( Iop_V128to64, mkexpr( vS ) );
+      storeBE( mkexpr( EA ), high64 );
+      storeBE( binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ), ty == Ity_I64 ? mkU64( 8 )
+            : mkU32( 8 ) ), low64 );
+      break;
+   }
+   case 0x38C:
+   {
+      UInt ea_off = 0;
+      IRExpr* irx_addr;
+      IRTemp hi64 = newTemp( Ity_I64 );
+      IRTemp lo64 = newTemp( Ity_I64 );
+
+      DIP("stxvw4x %d,r%u,r%u\n", (UInt)XS, rA_addr, rB_addr);
+
+      // This instruction supports word-aligned stores, so EA may not be
+      // quad-word aligned.  Therefore, do 4 individual word-size stores.
+      assign( hi64, unop( Iop_V128HIto64, mkexpr( vS ) ) );
+      assign( lo64, unop( Iop_V128to64, mkexpr( vS ) ) );
+
+      storeBE( mkexpr( EA ), unop( Iop_64HIto32, mkexpr( hi64 ) ) );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      storeBE( irx_addr, unop( Iop_64to32, mkexpr( hi64 ) ) );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      storeBE( irx_addr, unop( Iop_64HIto32, mkexpr( lo64 ) ) );
+      ea_off += 4;
+      irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
+                        ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
+      storeBE( irx_addr, unop( Iop_64to32, mkexpr( lo64 ) ) );
+
+      break;
+   }
+   default:
+      vex_printf( "dis_vx_store(ppc)(opc2)\n" );
+      return False;
+   }
+   return True;
+}
+
+/*
+ * VSX permute and other miscealleous instructions
+ */
+static Bool
+dis_vx_permute_misc( UInt theInstr, UInt opc2 )
+{
+   /* XX3-Form */
+   UChar opc1 = ifieldOPC( theInstr );
+   UChar XT = ifieldRegXT ( theInstr );
+   UChar XA = ifieldRegXA ( theInstr );
+   UChar XB = ifieldRegXB ( theInstr );
+   IRTemp vT = newTemp( Ity_V128 );
+   IRTemp vA = newTemp( Ity_V128 );
+   IRTemp vB = newTemp( Ity_V128 );
+
+   if (opc1 != 0x3C) {
+      vex_printf( "dis_vx_permute_misc(ppc)(instr)\n" );
+      return False;
+   }
+
+   assign( vA, getVSReg( XA ) );
+   assign( vB, getVSReg( XB ) );
+
+   switch (opc2) {
+      case 0x8: // xxsldwi (VSX Shift Left Double by Word Immediate)
+      {
+         UChar SHW = ifieldSHW ( theInstr );
+         IRTemp result = newTemp(Ity_V128);
+         if ( SHW != 0 ) {
+             IRTemp hi = newTemp(Ity_V128);
+             IRTemp lo = newTemp(Ity_V128);
+             assign( hi, binop(Iop_ShlV128, mkexpr(vA), mkU8(SHW*32)) );
+             assign( lo, binop(Iop_ShrV128, mkexpr(vB), mkU8(128-SHW*32)) );
+             assign ( result, binop(Iop_OrV128, mkexpr(hi), mkexpr(lo)) );
+         } else
+             assign ( result, mkexpr(vA) );
+         DIP("xxsldwi v%d,v%d,v%d,%d\n", (UInt)XT, (UInt)XA, (UInt)XB, (UInt)SHW);
+         putVSReg( XT, mkexpr(result) );
+         break;
+      }
+      case 0x28: // xpermdi (VSX Permute Doubleword Immediate)
+      {
+         UChar DM = ifieldDM ( theInstr );
+         IRTemp hi = newTemp(Ity_I64);
+         IRTemp lo = newTemp(Ity_I64);
+
+         if (DM & 0x2)
+           assign( hi, unop(Iop_V128to64, mkexpr(vA)) );
+         else
+           assign( hi, unop(Iop_V128HIto64, mkexpr(vA)) );
+
+         if (DM & 0x1)
+           assign( lo, unop(Iop_V128to64, mkexpr(vB)) );
+         else
+           assign( lo, unop(Iop_V128HIto64, mkexpr(vB)) );
+
+         assign( vT, binop(Iop_64HLtoV128, mkexpr(hi), mkexpr(lo)) );
+
+         DIP("xxpermdi v%d,v%d,v%d,0x%x\n", (UInt)XT, (UInt)XA, (UInt)XB, (UInt)DM);
+         putVSReg( XT, mkexpr( vT ) );
+         break;
+      }
+      case 0x48: // xxmrghw (VSX Merge High Word)
+      case 0xc8: // xxmrglw (VSX Merge Low Word)
+      {
+         char type = (opc2 == 0x48) ? 'h' : 'l';
+         IROp word_op = (opc2 == 0x48) ? Iop_V128HIto64 : Iop_V128to64;
+         IRTemp a64 = newTemp(Ity_I64);
+         IRTemp ahi32 = newTemp(Ity_I32);
+         IRTemp alo32 = newTemp(Ity_I32);
+         IRTemp b64 = newTemp(Ity_I64);
+         IRTemp bhi32 = newTemp(Ity_I32);
+         IRTemp blo32 = newTemp(Ity_I32);
+
+         assign( a64, unop(word_op, mkexpr(vA)) );
+         assign( ahi32, unop(Iop_64HIto32, mkexpr(a64)) );
+         assign( alo32, unop(Iop_64to32, mkexpr(a64)) );
+
+         assign( b64, unop(word_op, mkexpr(vB)) );
+         assign( bhi32, unop(Iop_64HIto32, mkexpr(b64)) );
+         assign( blo32, unop(Iop_64to32, mkexpr(b64)) );
+
+         assign( vT, binop(Iop_64HLtoV128,
+                           binop(Iop_32HLto64, mkexpr(ahi32), mkexpr(bhi32)),
+                           binop(Iop_32HLto64, mkexpr(alo32), mkexpr(blo32))) );
+
+         DIP("xxmrg%cw v%d,v%d,v%d\n", type, (UInt)XT, (UInt)XA, (UInt)XB);
+         putVSReg( XT, mkexpr( vT ) );
+         break;
+      }
+      case 0x018: // xxsel (VSX Select)
+      {
+         UChar XC = ifieldRegXC(theInstr);
+         IRTemp vC = newTemp( Ity_V128 );
+         assign( vC, getVSReg( XC ) );
+         DIP("xxsel v%d,v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB, (UInt)XC);
+         /* vD = (vA & ~vC) | (vB & vC) */
+         putVSReg( XT, binop(Iop_OrV128,
+            binop(Iop_AndV128, mkexpr(vA), unop(Iop_NotV128, mkexpr(vC))),
+            binop(Iop_AndV128, mkexpr(vB), mkexpr(vC))) );
+         break;
+      }
+      case 0x148: // xxspltw (VSX Splat Word)
+      {
+         UChar UIM   = ifieldRegA(theInstr) & 3;
+         UChar sh_uim = (3 - (UIM)) * 32;
+         DIP("xxspltw v%d,v%d,%d\n", (UInt)XT, (UInt)XB, UIM);
+         putVSReg( XT,
+                   unop( Iop_Dup32x4,
+                         unop( Iop_V128to32,
+                               binop( Iop_ShrV128, mkexpr( vB ), mkU8( sh_uim ) ) ) ) );
+         break;
+      }
+
+      default:
+         vex_printf( "dis_vx_permute_misc(ppc)(opc2)\n" );
+         return False;
+   }
+   return True;
+}
+
+/*
   AltiVec Load Instructions
 */
 static Bool dis_av_load ( VexAbiInfo* vbi, UInt theInstr )
@@ -7400,7 +11249,6 @@
    return True;
 }
 
-
 /*
   AltiVec Store Instructions
 */
@@ -8164,7 +12012,7 @@
                                mkU8(15))) );
 
       putVReg( vD_addr,
-               binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+               binop(Iop_QNarrowBin32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) );
       break;
    }
    case 0x21: { // vmhraddshs (Mult High Round, Add Signed HW Saturate, AV p186)
@@ -8198,7 +12046,8 @@
                                            mkexpr(aHi), mkexpr(bHi))),
                                mkU8(15))) );
 
-      putVReg( vD_addr, binop(Iop_QNarrow32Sx4, mkexpr(zHi), mkexpr(zLo)) );
+      putVReg( vD_addr,
+               binop(Iop_QNarrowBin32Sto16Sx8, mkexpr(zHi), mkexpr(zLo)) );
       break;
    }
    case 0x22: { // vmladduhm (Mult Low, Add Unsigned HW Modulo, AV p194)
@@ -8216,7 +12065,8 @@
       assign(zHi, binop(Iop_Add32x4,
                      binop(Iop_MullEven16Ux8, mkexpr(aHi), mkexpr(bHi)),
                      mkexpr(cHi)));
-      putVReg(vD_addr, binop(Iop_Narrow32x4, mkexpr(zHi), mkexpr(zLo)));
+      putVReg( vD_addr,
+               binop(Iop_NarrowBin32to16x8, mkexpr(zHi), mkexpr(zLo)) );
       break;
    }
 
@@ -8700,25 +12550,27 @@
    /* Packing */
    case 0x00E: // vpkuhum (Pack Unsigned HW Unsigned Modulo, AV p224)
       DIP("vpkuhum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, binop(Iop_Narrow16x8, mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               binop(Iop_NarrowBin16to8x16, mkexpr(vA), mkexpr(vB)) );
       return True;
 
    case 0x04E: // vpkuwum (Pack Unsigned W Unsigned Modulo, AV p226)
       DIP("vpkuwum v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
-      putVReg( vD_addr, binop(Iop_Narrow32x4, mkexpr(vA), mkexpr(vB)) );
+      putVReg( vD_addr,
+               binop(Iop_NarrowBin32to16x8, mkexpr(vA), mkexpr(vB)) );
       return True;
 
    case 0x08E: // vpkuhus (Pack Unsigned HW Unsigned Saturate, AV p225)
       DIP("vpkuhus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
       putVReg( vD_addr,
-               binop(Iop_QNarrow16Ux8, mkexpr(vA), mkexpr(vB)) );
+               binop(Iop_QNarrowBin16Uto8Ux16, mkexpr(vA), mkexpr(vB)) );
       // TODO: set VSCR[SAT]
       return True;
 
    case 0x0CE: // vpkuwus (Pack Unsigned W Unsigned Saturate, AV p227)
       DIP("vpkuwus v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
       putVReg( vD_addr,
-               binop(Iop_QNarrow32Ux4, mkexpr(vA), mkexpr(vB)) );
+               binop(Iop_QNarrowBin32Uto16Ux8, mkexpr(vA), mkexpr(vB)) );
       // TODO: set VSCR[SAT]
       return True;
 
@@ -8737,7 +12589,7 @@
                             unop(Iop_NotV128,
                                  binop(Iop_SarN16x8,
                                        mkexpr(vB), mkU8(15)))) );
-      putVReg( vD_addr, binop(Iop_QNarrow16Ux8,
+      putVReg( vD_addr, binop(Iop_QNarrowBin16Uto8Ux16,
                               mkexpr(vA_tmp), mkexpr(vB_tmp)) );
       // TODO: set VSCR[SAT]
       return True;
@@ -8757,7 +12609,7 @@
                             unop(Iop_NotV128,
                                  binop(Iop_SarN32x4,
                                        mkexpr(vB), mkU8(31)))) );
-      putVReg( vD_addr, binop(Iop_QNarrow32Ux4,
+      putVReg( vD_addr, binop(Iop_QNarrowBin32Uto16Ux8,
                               mkexpr(vA_tmp), mkexpr(vB_tmp)) );
       // TODO: set VSCR[SAT]
       return True;
@@ -8765,14 +12617,14 @@
    case 0x18E: // vpkshss (Pack Signed HW Signed Saturate, AV p220)
       DIP("vpkshss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
       putVReg( vD_addr,
-               binop(Iop_QNarrow16Sx8, mkexpr(vA), mkexpr(vB)) );
+               binop(Iop_QNarrowBin16Sto8Sx16, mkexpr(vA), mkexpr(vB)) );
       // TODO: set VSCR[SAT]
       return True;
 
    case 0x1CE: // vpkswss (Pack Signed W Signed Saturate, AV p222)
       DIP("vpkswss v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr);
       putVReg( vD_addr,
-               binop(Iop_QNarrow32Sx4, mkexpr(vA), mkexpr(vB)) );
+               binop(Iop_QNarrowBin32Sto16Sx8, mkexpr(vA), mkexpr(vB)) );
       // TODO: set VSCR[SAT]
       return True;
 
@@ -8812,7 +12664,7 @@
       assign( b_tmp, binop(Iop_OrV128, mkexpr(b1),
                            binop(Iop_OrV128, mkexpr(b2), mkexpr(b3))) );
 
-      putVReg( vD_addr, binop(Iop_Narrow32x4,
+      putVReg( vD_addr, binop(Iop_NarrowBin32to16x8,
                               mkexpr(a_tmp), mkexpr(b_tmp)) );
       return True;
    }
@@ -9238,8 +13090,221 @@
 }
 
 
+/* The 0x3C primary opcode (VSX category) uses several different forms of
+ * extended opcodes:
+ *   o XX2-form:
+ *      - [10:2] (IBM notation [21:29])
+ *   o XX3-form variants:
+ *       - variant 1: [10:3] (IBM notation [21:28])
+ *       - variant 2: [9:3] (IBM notation [22:28])
+ *       - variant 3: [7:3] (IBM notation [24:28])
+ *   o XX-4 form:
+ *      - [10:6] (IBM notation [21:25])
+ *
+ * The XX2-form needs bit 0 masked from the standard extended opcode
+ * as returned by ifieldOPClo10; the XX3-form needs bits 0 and 1 masked;
+ * and the XX4-form needs bits 0, 1, and 2 masked.  Additionally, the
+ * XX4 and XX3 (variants 2 and 3) forms need certain bits masked on the
+ * front end since their encoding does not begin at bit 21 like the standard
+ * format.
+ *
+ * The get_VSX60_opc2() function uses the vsx_insn array below to obtain the
+ * secondary opcode for such VSX instructions.
+ *
+*/
 
 
+struct vsx_insn {
+   UInt opcode;
+   Char * name;
+};
+
+//  ATTENTION:  Keep this array sorted on the opcocde!!!
+static struct vsx_insn vsx_all[] = {
+      { 0x8, "xxsldwi" },
+      { 0x18, "xxsel" },
+      { 0x28, "xxpermdi" },
+      { 0x48, "xxmrghw" },
+      { 0x80, "xsadddp" },
+      { 0x84, "xsmaddadp" },
+      { 0x8c, "xscmpudp" },
+      { 0x90, "xscvdpuxws" },
+      { 0x92, "xsrdpi" },
+      { 0x94, "xsrsqrtedp" },
+      { 0x96, "xssqrtdp" },
+      { 0xa0, "xssubdp" },
+      { 0xa4, "xsmaddmdp" },
+      { 0xac, "xscmpodp" },
+      { 0xb0, "xscvdpsxws" },
+      { 0xb2, "xsrdpiz" },
+      { 0xb4, "xsredp" },
+      { 0xc0, "xsmuldp" },
+      { 0xc4, "xsmsubadp" },
+      { 0xc8, "xxmrglw" },
+      { 0xd2, "xsrdpip" },
+      { 0xd4, "xstsqrtdp" },
+      { 0xd6, "xsrdpic" },
+      { 0xe0, "xsdivdp" },
+      { 0xe4, "xsmsubmdp" },
+      { 0xf2, "xsrdpim" },
+      { 0xf4, "xstdivdp" },
+      { 0x100, "xvaddsp" },
+      { 0x104, "xvmaddasp" },
+      { 0x10c, "xvcmpeqsp" },
+      { 0x110, "xvcvspuxws" },
+      { 0x112, "xvrspi" },
+      { 0x114, "xvrsqrtesp" },
+      { 0x116, "xvsqrtsp" },
+      { 0x120, "xvsubsp" },
+      { 0x124, "xvmaddmsp" },
+      { 0x12c, "xvcmpgtsp" },
+      { 0x130, "xvcvspsxws" },
+      { 0x132, "xvrspiz" },
+      { 0x134, "xvresp" },
+      { 0x140, "xvmulsp" },
+      { 0x144, "xvmsubasp" },
+      { 0x148, "xxspltw" },
+      { 0x14c, "xvcmpgesp" },
+      { 0x150, "xvcvuxwsp" },
+      { 0x152, "xvrspip" },
+      { 0x154, "xvtsqrtsp" },
+      { 0x156, "xvrspic" },
+      { 0x160, "xvdivsp" },
+      { 0x164, "xvmsubmsp" },
+      { 0x170, "xvcvsxwsp" },
+      { 0x172, "xvrspim" },
+      { 0x174, "xvtdivsp" },
+      { 0x180, "xvadddp" },
+      { 0x184, "xvmaddadp" },
+      { 0x18c, "xvcmpeqdp" },
+      { 0x190, "xvcvdpuxws" },
+      { 0x192, "xvrdpi" },
+      { 0x194, "xvrsqrtedp" },
+      { 0x196, "xvsqrtdp" },
+      { 0x1a0, "xvsubdp" },
+      { 0x1a4, "xvmaddmdp" },
+      { 0x1ac, "xvcmpgtdp" },
+      { 0x1b0, "xvcvdpsxws" },
+      { 0x1b2, "xvrdpiz" },
+      { 0x1b4, "xvredp" },
+      { 0x1c0, "xvmuldp" },
+      { 0x1c4, "xvmsubadp" },
+      { 0x1cc, "xvcmpgedp" },
+      { 0x1d0, "xvcvuxwdp" },
+      { 0x1d2, "xvrdpip" },
+      { 0x1d4, "xvtsqrtdp" },
+      { 0x1d6, "xvrdpic" },
+      { 0x1e0, "xvdivdp" },
+      { 0x1e4, "xvmsubmdp" },
+      { 0x1f0, "xvcvsxwdp" },
+      { 0x1f2, "xvrdpim" },
+      { 0x1f4, "xvtdivdp" },
+      { 0x208, "xxland" },
+      { 0x212, "xscvdpsp" },
+      { 0x228, "xxlandc" },
+      { 0x248 , "xxlor" },
+      { 0x268, "xxlxor" },
+      { 0x280, "xsmaxdp" },
+      { 0x284, "xsnmaddadp" },
+      { 0x288, "xxlnor" },
+      { 0x290, "xscvdpuxds" },
+      { 0x292, "xscvspdp" },
+      { 0x2a0, "xsmindp" },
+      { 0x2a4, "xsnmaddmdp" },
+      { 0x2b0, "xscvdpsxds" },
+      { 0x2b2, "xsabsdp" },
+      { 0x2c0, "xscpsgndp" },
+      { 0x2c4, "xsnmsubadp" },
+      { 0x2d0, "xscvuxddp" },
+      { 0x2d2, "xsnabsdp" },
+      { 0x2e4, "xsnmsubmdp" },
+      { 0x2f0, "xscvsxddp" },
+      { 0x2f2, "xsnegdp" },
+      { 0x300, "xvmaxsp" },
+      { 0x304, "xvnmaddasp" },
+      { 0x30c, "xvcmpeqsp." },
+      { 0x310, "xvcvspuxds" },
+      { 0x312, "xvcvdpsp" },
+      { 0x320, "xvminsp" },
+      { 0x324, "xvnmaddmsp" },
+      { 0x32c, "xvcmpgtsp." },
+      { 0x330, "xvcvspsxds" },
+      { 0x332, "xvabssp" },
+      { 0x340, "xvcpsgnsp" },
+      { 0x344, "xvnmsubasp" },
+      { 0x34c, "xvcmpgesp." },
+      { 0x350, "xvcvuxdsp" },
+      { 0x352, "xvnabssp" },
+      { 0x364, "xvnmsubmsp" },
+      { 0x370, "xvcvsxdsp" },
+      { 0x372, "xvnegsp" },
+      { 0x380, "xvmaxdp" },
+      { 0x384, "xvnmaddadp" },
+      { 0x38c, "xvcmpeqdp." },
+      { 0x390, "xvcvdpuxds" },
+      { 0x392, "xvcvspdp" },
+      { 0x3a0, "xvmindp" },
+      { 0x3a4, "xvnmaddmdp" },
+      { 0x3ac, "xvcmpgtdp." },
+      { 0x3b0, "xvcvdpsxds" },
+      { 0x3b2, "xvabsdp" },
+      { 0x3c0, "xvcpsgndp" },
+      { 0x3c4, "xvnmsubadp" },
+      { 0x3cc, "xvcmpgedp." },
+      { 0x3d0, "xvcvuxddp" },
+      { 0x3d2, "xvnabsdp" },
+      { 0x3e4, "xvnmsubmdp" },
+      { 0x3f0, "xvcvsxddp" },
+      { 0x3f2, "xvnegdp" }
+};
+#define VSX_ALL_LEN 135
+
+// ATTENTION: This search function assumes vsx_all array is sorted.
+static Int findVSXextOpCode(UInt opcode)
+{
+   Int low, mid, high;
+   low = 0;
+   high = VSX_ALL_LEN - 1;
+   while (low <= high) {
+      mid = (low + high)/2;
+      if (opcode < vsx_all[mid].opcode)
+         high = mid - 1;
+      else if (opcode > vsx_all[mid].opcode)
+         low = mid + 1;
+      else
+         return mid;
+   }
+   return -1;
+}
+
+
+/* The full 10-bit extended opcode retrieved via ifieldOPClo10 is
+ * passed, and we then try to match it up with one of the VSX forms
+ * below.
+ */
+static UInt get_VSX60_opc2(UInt opc2_full)
+{
+#define XX2_MASK 0x000003FE
+#define XX3_1_MASK 0x000003FC
+#define XX3_2_MASK 0x000001FC
+#define XX3_3_MASK 0x0000007C
+#define XX4_MASK 0x00000018
+   Int ret;
+   UInt vsxExtOpcode = 0;
+
+   if (( ret = findVSXextOpCode(opc2_full & XX2_MASK)) >= 0)
+      vsxExtOpcode = vsx_all[ret].opcode;
+   else if (( ret = findVSXextOpCode(opc2_full & XX3_1_MASK)) >= 0)
+      vsxExtOpcode = vsx_all[ret].opcode;
+   else if (( ret = findVSXextOpCode(opc2_full & XX3_2_MASK)) >= 0)
+      vsxExtOpcode = vsx_all[ret].opcode;
+   else if (( ret = findVSXextOpCode(opc2_full & XX3_3_MASK)) >= 0)
+      vsxExtOpcode = vsx_all[ret].opcode;
+   else if (( ret = findVSXextOpCode(opc2_full & XX4_MASK)) >= 0)
+      vsxExtOpcode = vsx_all[ret].opcode;
+
+   return vsxExtOpcode;
+}
 
 /*------------------------------------------------------------*/
 /*--- Disassemble a single instruction                     ---*/
@@ -9268,6 +13333,7 @@
    Bool      allow_V  = False;
    Bool      allow_FX = False;
    Bool      allow_GX = False;
+   Bool      allow_VX = False;  // Equates to "supports Power ISA 2.06
    UInt      hwcaps = archinfo->hwcaps;
    Long      delta;
 
@@ -9277,11 +13343,13 @@
       allow_V  = (0 != (hwcaps & VEX_HWCAPS_PPC64_V));
       allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC64_FX));
       allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC64_GX));
+      allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC64_VX));
    } else {
       allow_F  = (0 != (hwcaps & VEX_HWCAPS_PPC32_F));
       allow_V  = (0 != (hwcaps & VEX_HWCAPS_PPC32_V));
       allow_FX = (0 != (hwcaps & VEX_HWCAPS_PPC32_FX));
       allow_GX = (0 != (hwcaps & VEX_HWCAPS_PPC32_GX));
+      allow_VX = (0 != (hwcaps & VEX_HWCAPS_PPC32_VX));
    }
 
    /* The running delta */
@@ -9473,6 +13541,20 @@
 
    case 0x3B:
       if (!allow_F) goto decode_noF;
+      opc2 = ifieldOPClo10(theInstr);
+      switch (opc2) {
+         case 0x3CE: // fcfidus (implemented as native insn)
+            if (!allow_VX)
+               goto decode_noVX;
+            if (dis_fp_round( theInstr ))
+               goto decode_success;
+            goto decode_failure;
+         case 0x34E: // fcfids
+            if (dis_fp_round( theInstr ))
+               goto decode_success;
+            goto decode_failure;
+      }
+
       opc2 = IFIELD(theInstr, 1, 5);
       switch (opc2) {
       /* Floating Point Arith Instructions */
@@ -9499,12 +13581,127 @@
          if (!allow_GX) goto decode_noGX;
          if (dis_fp_arith(theInstr)) goto decode_success;
          goto decode_failure;
-         
+
       default:
          goto decode_failure;
       }
       break;
 
+   case 0x3C: // VSX instructions (except load/store)
+   {
+      UInt vsxOpc2 = get_VSX60_opc2(opc2);
+      /* The vsxOpc2 returned is the "normalized" value, representing the
+       * instructions secondary opcode as taken from the standard secondary
+       * opcode field [21:30] (IBM notatition), even if the actual field
+       * is non-standard.  These normalized values are given in the opcode
+       * appendices of the ISA 2.06 document.
+       */
+      if (vsxOpc2 == 0)
+         goto decode_failure;
+
+      switch (vsxOpc2) {
+         case 0x8: case 0x28: case 0x48: case 0xc8: // xxsldwi, xxpermdi, xxmrghw, xxmrglw
+         case 0x018: case 0x148: // xxsel, xxspltw
+            if (dis_vx_permute_misc(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x268: case 0x248: case 0x288: case 0x208: case 0x228: // xxlxor, xxlor, xxlnor, xxland, xxlandc
+            if (dis_vx_logic(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x2B2: case 0x2C0: // xsabsdp, xscpsgndp
+         case 0x2D2: case 0x2F2: // xsnabsdp, xsnegdp
+         case 0x280: case 0x2A0: // xsmaxdp, xsmindp
+         case 0x0F2: case 0x0D2: // xsrdpim, xsrdpip
+         case 0x0B4: case 0x094: // xsredp, xsrsqrtedp
+         case 0x0D6: case 0x0B2: // xsrdpic, xsrdpiz
+         case 0x092: // xsrdpi
+            if (dis_vxs_misc(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x08C: case 0x0AC: // xscmpudp, xscmpodp
+            if (dis_vx_cmp(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x080: case 0x0E0: // xsadddp, xsdivdp
+         case 0x084: case 0x0A4: // xsmaddadp, xsmaddmdp
+         case 0x0C4: case 0x0E4: // xsmsubadp, xsmsubmdp
+         case 0x284: case 0x2A4: // xsnmaddadp, xsnmaddmdp
+         case 0x2C4: case 0x2E4: // xsnmsubadp, xsnmsubmdp
+         case 0x0C0: case 0x0A0: // xsmuldp, xssubdp
+         case 0x096: case 0x0F4: // xssqrtdp, xstdivdp
+         case 0x0D4: // xstsqrtdp
+            if (dis_vxs_arith(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x180: // xvadddp
+         case 0x1E0: // xvdivdp
+         case 0x1C0: // xvmuldp
+         case 0x1A0: // xvsubdp
+         case 0x184: case 0x1A4: // xvmaddadp, xvmaddmdp
+         case 0x1C4: case 0x1E4: // xvmsubadp, xvmsubmdp
+         case 0x384: case 0x3A4: // xvnmaddadp, xvnmaddmdp
+         case 0x3C4: case 0x3E4: // xvnmsubadp, xvnmsubmdp
+         case 0x1D4: case 0x1F4: // xvtsqrtdp, xvtdivdp
+         case 0x196: // xvsqrtdp
+            if (dis_vxv_dp_arith(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+         case 0x100: // xvaddsp
+         case 0x160: // xvdivsp
+         case 0x140: // xvmulsp
+         case 0x120: // xvsubsp
+         case 0x104: case 0x124: // xvmaddasp, xvmaddmsp
+         case 0x144: case 0x164: // xvmsubasp, xvmsubmsp
+         case 0x304: case 0x324: // xvnmaddasp, xvnmaddmsp
+         case 0x344: case 0x364: // xvnmsubasp, xvnmsubmsp
+         case 0x154: case 0x174: // xvtsqrtsp, xvtdivsp
+         case 0x116: // xvsqrtsp
+            if (dis_vxv_sp_arith(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+
+         case 0x2B0: case 0x2F0: case 0x2D0: // xscvdpsxds, xscvsxddp, xscvuxddp
+         case 0x1b0: case 0x130: // xvcvdpsxws, xvcvspsxws
+         case 0x0b0: case 0x290: // xscvdpsxws, xscvdpuxds
+         case 0x212: case 0x090: // xscvdpsp, xscvdpuxws
+         case 0x292: case 0x312: // xscvspdp, xvcvdpsp
+         case 0x390: case 0x190: // xvcvdpuxds, xvcvdpuxws
+         case 0x3B0: case 0x310: // xvcvdpsxds, xvcvspuxds
+         case 0x392: case 0x330: // xvcvspdp, xvcvspsxds
+         case 0x110: case 0x3f0: // xvcvspuxws, xvcvsxddp
+         case 0x370: case 0x1f0: // xvcvsxdsp, xvcvsxwdp
+         case 0x170: case 0x150: // xvcvsxwsp, xvcvuxwsp
+         case 0x3d0: case 0x350: // xvcvuxddp, xvcvuxdsp
+         case 0x1d0: // xvcvuxwdp
+            if (dis_vx_conv(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+
+         case 0x18C: case 0x38C: // xvcmpeqdp[.]
+         case 0x10C: case 0x30C: // xvcmpeqsp[.]
+         case 0x14C: case 0x34C: // xvcmpgesp[.]
+         case 0x12C: case 0x32C: // xvcmpgtsp[.]
+         case 0x1CC: case 0x3CC: // xvcmpgedp[.]
+         case 0x1AC: case 0x3AC: // xvcmpgtdp[.]
+             if (dis_vvec_cmp(theInstr, vsxOpc2)) goto decode_success;
+             goto decode_failure;
+
+         case 0x134:  // xvresp
+         case 0x1B4:  // xvredp
+         case 0x194: case 0x114: // xvrsqrtedp, xvrsqrtesp
+         case 0x380: case 0x3A0: // xvmaxdp, xvmindp
+         case 0x300: case 0x320: // xvmaxsp, xvminsp
+         case 0x3C0: case 0x340: // xvcpsgndp, xvcpsgnsp
+         case 0x3B2: case 0x332: // xvabsdp, xvabssp
+         case 0x3D2: case 0x352: // xvnabsdp, xvnabssp
+         case 0x192: case 0x1D6: // xvrdpi, xvrdpic
+         case 0x1F2: case 0x1D2: // xvrdpim, xvrdpip
+         case 0x1B2: case 0x3F2: // xvrdpiz, xvnegdp
+         case 0x112: case 0x156: // xvrspi, xvrspic
+         case 0x172: case 0x152: // xvrspim, xvrspip
+         case 0x132: // xvrspiz
+            if (dis_vxv_misc(theInstr, vsxOpc2)) goto decode_success;
+            goto decode_failure;
+
+         default:
+            goto decode_failure;
+      }
+      break;
+   }
+
    /* 64bit Integer Stores */
    case 0x3E:  // std, stdu
       if (!mode64) goto decode_failure;
@@ -9555,6 +13752,11 @@
          if (dis_fp_cmp(theInstr)) goto decode_success;
          goto decode_failure;
          
+      case 0x080: // ftdiv
+      case 0x0A0: // ftsqrt
+         if (dis_fp_tests(theInstr)) goto decode_success;
+         goto decode_failure;
+
       /* Floating Point Rounding/Conversion Instructions */         
       case 0x00C: // frsp
       case 0x00E: // fctiw
@@ -9564,6 +13766,11 @@
       case 0x34E: // fcfid
          if (dis_fp_round(theInstr)) goto decode_success;
          goto decode_failure;
+      case 0x3CE: case 0x3AE: case 0x3AF: // fcfidu, fctidu[z] (implemented as native insns)
+      case 0x08F: case 0x08E: // fctiwu[z] (implemented as native insns)
+         if (!allow_VX) goto decode_noVX;
+         if (dis_fp_round(theInstr)) goto decode_success;
+         goto decode_failure;
 
       /* Power6 rounding stuff */
       case 0x1E8: // frim
@@ -9640,13 +13847,26 @@
       case 0x1CB: case 0x04B: case 0x00B: // divwu, mulhw, mulhwu
       case 0x0EB: case 0x068: case 0x028: // mullw, neg,   subf
       case 0x008: case 0x088: case 0x0E8: // subfc, subfe, subfme
-      case 0x0C8:                         // subfze
+      case 0x0C8: // subfze
+         if (dis_int_arith( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x18B: // divweu (implemented as native insn)
+      case 0x1AB: // divwe (implemented as native insn)
+         if (!allow_VX) goto decode_noVX;
          if (dis_int_arith( theInstr )) goto decode_success;
          goto decode_failure;
 
       /* 64bit Integer Arithmetic */
       case 0x009: case 0x049: case 0x0E9: // mulhdu, mulhd, mulld
-      case 0x1C9: case 0x1E9:             // divdu, divd
+      case 0x1C9: case 0x1E9: // divdu, divd
+         if (!mode64) goto decode_failure;
+         if (dis_int_arith( theInstr )) goto decode_success;
+         goto decode_failure;
+
+      case 0x1A9: //  divde (implemented as native insn)
+      case 0x189: //  divdeuo (implemented as native insn)
+         if (!allow_VX) goto decode_noVX;
          if (!mode64) goto decode_failure;
          if (dis_int_arith( theInstr )) goto decode_success;
          goto decode_failure;
@@ -9730,7 +13950,8 @@
 
       /* Integer Load and Store with Byte Reverse Instructions */
       case 0x316: case 0x216: case 0x396: // lhbrx, lwbrx, sthbrx
-      case 0x296:                         // stwbrx
+      case 0x296: case 0x214:             // stwbrx, ldbrx
+      case 0x294:                         // stdbrx
          if (dis_int_ldst_rev( theInstr )) goto decode_success;
          goto decode_failure;
          
@@ -9815,6 +14036,11 @@
          if (dis_fp_load( theInstr )) goto decode_success;
          goto decode_failure;
 
+      case 0x377:                         // lfiwzx
+         if (!allow_F) goto decode_noF;
+         if (dis_fp_load( theInstr )) goto decode_success;
+         goto decode_failure;
+
       /* AltiVec instructions */
 
       /* AV Cache Control - Data streams */
@@ -9838,6 +14064,31 @@
          if (dis_av_store( theInstr )) goto decode_success;
          goto decode_failure;
 
+      /* VSX Load */
+      case 0x24C: // lxsdx
+      case 0x34C: // lxvd2x
+      case 0x14C: // lxvdsx
+      case 0x30C: // lxvw4x
+    	  if (dis_vx_load( theInstr )) goto decode_success;
+          goto decode_failure;
+
+      /* VSX Store */
+      case 0x2CC: // stxsdx
+      case 0x3CC: // stxvd2x
+      case 0x38C: // stxvw4x
+    	  if (dis_vx_store( theInstr )) goto decode_success;
+    	  goto decode_failure;
+
+      /* Miscellaneous ISA 2.06 instructions */
+      case 0x1FA: // popcntd
+      case 0x17A: // popcntw
+    	  if (dis_int_logic( theInstr )) goto decode_success;
+    	  goto decode_failure;
+
+      case 0x0FC: // bpermd
+         if (dis_int_logic( theInstr )) goto decode_success;
+         goto decode_failure;
+
       default:
          /* Deal with some other cases that we would otherwise have
             punted on. */
@@ -10023,6 +14274,10 @@
       vassert(!allow_V);
       vex_printf("disInstr(ppc): declined to decode an AltiVec insn.\n");
       goto decode_failure;
+   decode_noVX:
+      vassert(!allow_VX);
+      vex_printf("disInstr(ppc): declined to decode a Power ISA 2.06 insn.\n");
+      goto decode_failure;
    decode_noFX:
       vassert(!allow_FX);
       vex_printf("disInstr(ppc): "
@@ -10105,10 +14360,10 @@
 
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
-            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX;
 
-   mask64 = VEX_HWCAPS_PPC64_V
-            | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+   mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
+		   | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX;
 
    if (mode64) {
       vassert((hwcaps_guest & mask32) == 0);
diff --git a/main/VEX/priv/guest_s390_defs.h b/main/VEX/priv/guest_s390_defs.h
new file mode 100644
index 0000000..3c38955
--- /dev/null
+++ b/main/VEX/priv/guest_s390_defs.h
@@ -0,0 +1,199 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                 guest_s390_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#ifndef __VEX_GUEST_S390_DEFS_H
+#define __VEX_GUEST_S390_DEFS_H
+
+#include "libvex_basictypes.h"        // offsetof
+#include "libvex_ir.h"                // IRSB  (needed by bb_to_IR.h)
+#include "libvex.h"                   // VexArch  (needed by bb_to_IR.h)
+#include "guest_generic_bb_to_IR.h"   // DisResult
+
+
+/* Convert one s390 insn to IR.  See the type DisOneInstrFn in
+   bb_to_IR.h. */
+DisResult disInstr_S390 ( IRSB*        irbb,
+                          Bool         put_IP,
+                          Bool         (*resteerOkFn) ( void*, Addr64 ),
+                          Bool         resteerCisOk,
+                          void*        callback_opaque,
+                          UChar*       guest_code,
+                          Long         delta,
+                          Addr64       guest_IP,
+                          VexArch      guest_arch,
+                          VexArchInfo* archinfo,
+                          VexAbiInfo*  abiinfo,
+                          Bool         host_bigendian );
+
+/* Used by the optimiser to specialise calls to helpers. */
+IRExpr* guest_s390x_spechelper ( HChar   *function_name,
+                                 IRExpr **args,
+                                 IRStmt **precedingStmts,
+                                 Int n_precedingStmts);
+
+
+/* Describes to the optimser which part of the guest state require
+   precise memory exceptions.  This is logically part of the guest
+   state description. */
+Bool guest_s390x_state_requires_precise_mem_exns ( Int, Int );
+
+extern VexGuestLayout s390xGuest_layout;
+
+
+#define S390X_GUEST_OFFSET(x)  offsetof(VexGuestS390XState, x)
+
+/*------------------------------------------------------------*/
+/*--- Dirty Helper functions.                              ---*/
+/*------------------------------------------------------------*/
+void s390x_dirtyhelper_00(VexGuestS390XState *guest_state);
+void s390x_dirtyhelper_EX(ULong torun);
+ULong s390x_dirtyhelper_STCK(ULong *addr);
+ULong s390x_dirtyhelper_STCKF(ULong *addr);
+ULong s390x_dirtyhelper_STCKE(ULong *addr);
+ULong s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, HWord addr);
+
+/* The various ways to compute the condition code. */
+enum {
+   S390_CC_OP_BITWISE = 0,
+   S390_CC_OP_SIGNED_COMPARE = 1,
+   S390_CC_OP_UNSIGNED_COMPARE = 2,
+   S390_CC_OP_SIGNED_ADD_32 = 3,
+   S390_CC_OP_SIGNED_ADD_64 = 4,
+   S390_CC_OP_UNSIGNED_ADD_32 = 5,
+   S390_CC_OP_UNSIGNED_ADD_64 = 6,
+   S390_CC_OP_UNSIGNED_ADDC_32 = 7,
+   S390_CC_OP_UNSIGNED_ADDC_64 = 8,
+   S390_CC_OP_SIGNED_SUB_32 = 9,
+   S390_CC_OP_SIGNED_SUB_64 = 10,
+   S390_CC_OP_UNSIGNED_SUB_32 = 11,
+   S390_CC_OP_UNSIGNED_SUB_64 = 12,
+   S390_CC_OP_UNSIGNED_SUBB_32 = 13,
+   S390_CC_OP_UNSIGNED_SUBB_64 = 14,
+   S390_CC_OP_LOAD_AND_TEST = 15,
+   S390_CC_OP_LOAD_POSITIVE_32 = 16,
+   S390_CC_OP_LOAD_POSITIVE_64 = 17,
+   S390_CC_OP_TEST_AND_SET = 18,
+   S390_CC_OP_TEST_UNDER_MASK_8 = 19,
+   S390_CC_OP_TEST_UNDER_MASK_16 = 20,
+   S390_CC_OP_SHIFT_LEFT_32 = 21,
+   S390_CC_OP_SHIFT_LEFT_64 = 22,
+   S390_CC_OP_INSERT_CHAR_MASK_32 = 23,
+   S390_CC_OP_BFP_RESULT_32 = 24,
+   S390_CC_OP_BFP_RESULT_64 = 25,
+   S390_CC_OP_BFP_RESULT_128 = 26,
+   S390_CC_OP_BFP_32_TO_INT_32 = 27,
+   S390_CC_OP_BFP_64_TO_INT_32 = 28,
+   S390_CC_OP_BFP_128_TO_INT_32 = 29,
+   S390_CC_OP_BFP_32_TO_INT_64 = 30,
+   S390_CC_OP_BFP_64_TO_INT_64 = 31,
+   S390_CC_OP_BFP_128_TO_INT_64 = 32,
+   S390_CC_OP_BFP_TDC_32 = 33,
+   S390_CC_OP_BFP_TDC_64 = 34,
+   S390_CC_OP_BFP_TDC_128 = 35,
+   S390_CC_OP_SET = 36
+};
+
+/*------------------------------------------------------------*/
+/*--- Thunk layout                                         ---*/
+/*------------------------------------------------------------*/
+
+/*
+   Z -- value is zero extended to 32 / 64 bit
+   S -- value is sign extended to 32 / 64 bit
+   F -- a binary floating point value
+
+   +--------------------------------+-----------------------+----------------------+-------------+
+   | op                             |   cc_dep1             |   cc_dep2            |   cc_ndep   |
+   +--------------------------------+-----------------------+----------------------+-------------+
+   | S390_CC_OP_BITWISE             | Z result              |                      |             |
+   | S390_CC_OP_SIGNED_COMPARE      | S 1st operand         | S 2nd operand        |             |
+   | S390_CC_OP_UNSIGNED_COMPARE    | Z 1st operand         | Z 2nd operand        |             |
+   | S390_CC_OP_SIGNED_ADD_32       | S 1st operand         | S 2nd operand        |             |
+   | S390_CC_OP_SIGNED_ADD_64       | S 1st operand         | S 2nd operand        |             |
+   | S390_CC_OP_UNSIGNED_ADD_32     | Z 1st operand         | Z 2nd operand        |             |
+   | S390_CC_OP_UNSIGNED_ADD_64     | Z 1st operand         | Z 2nd operand        |             |
+   | S390_CC_OP_UNSIGNED_ADDC_32    | Z 1st operand         | Z 2nd operand        | Z carry in  |
+   | S390_CC_OP_UNSIGNED_ADDC_64    | Z 1st operand         | Z 2nd operand        | Z carry in  |
+   | S390_CC_OP_SIGNED_SUB_32       | S left operand        | S right operand      |             |
+   | S390_CC_OP_SIGNED_SUB_64       | S left operand        | S right operand      |             |
+   | S390_CC_OP_UNSIGNED_SUB_32     | Z left operand        | Z right operand      |             |
+   | S390_CC_OP_UNSIGNED_SUB_64     | Z left operand        | Z right operand      |             |
+   | S390_CC_OP_UNSIGNED_SUBB_32    | Z left operand        | Z right operand      | Z borrow in |
+   | S390_CC_OP_UNSIGNED_SUBB_64    | Z left operand        | Z right operand      | Z borrow in |
+   | S390_CC_OP_LOAD_AND_TEST       | S loaded value        |                      |             |
+   | S390_CC_OP_LOAD_POSITIVE_32    | S loaded value        |                      |             |
+   | S390_CC_OP_LOAD_POSITIVE_64    | S loaded value        |                      |             |
+   | S390_CC_OP_TEST_AND_SET        | Z tested value        |                      |             |
+   | S390_CC_OP_TEST_UNDER_MASK_8   | Z tested value        | Z mask               |             |
+   | S390_CC_OP_TEST_UNDER_MASK_16  | Z tested value        | Z mask               |             |
+   | S390_CC_OP_SHIFT_LEFT_32       | Z value to be shifted | Z shift amount       |             |
+   | S390_CC_OP_SHIFT_LEFT_64       | Z value to be shifted | Z shift amount       |             |
+   | S390_CC_OP_INSERT_CHAR_MASK_32 | Z result              | Z mask               |             |
+   | S390_CC_OP_BFP_RESULT_32       | F result              |                      |             |
+   | S390_CC_OP_BFP_RESULT_64       | F result              |                      |             |
+   | S390_CC_OP_BFP_RESULT_128      | F result hi 64 bits   | F result low 64 bits |             |
+   | S390_CC_OP_BFP_32_TO_INT_32    | F source              |                      |             |
+   | S390_CC_OP_BFP_64_TO_INT_32    | F source              |                      |             |
+   | S390_CC_OP_BFP_128_TO_INT_32   | F source hi 64 bits   |                      |             |
+   | S390_CC_OP_BFP_32_TO_INT_64    | F source              |                      |             |
+   | S390_CC_OP_BFP_64_TO_INT_64    | F source              |                      |             |
+   | S390_CC_OP_BFP_128_TO_INT_64   | F source hi 64 bits   |                      |             |
+   | S390_CC_OP_BFP_TDC_32          | F value               | Z class              |             |
+   | S390_CC_OP_BFP_TDC_64          | F value               | Z class              |             |
+   | S390_CC_OP_BFP_TDC_128         | F value hi 64 bits    | F value low 64 bits  | Z class     |
+   | S390_CC_OP_SET                 | Z condition code      |                      |             |
+   +--------------------------------+-----------------------+----------------------+-------------+
+*/
+
+/*------------------------------------------------------------*/
+/*--- Condition code helpers.                             ---*/
+/*------------------------------------------------------------*/
+UInt s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2,
+                       ULong cc_ndep);
+UInt s390_calculate_icc(ULong op, ULong dep1, ULong dep2);
+UInt s390_calculate_cond(ULong mask, ULong op, ULong dep1, ULong dep2,
+                         ULong ndep);
+
+/* Size of special instruction preamble */
+#define S390_SPECIAL_OP_PREAMBLE_SIZE 8
+
+/* Size of special instructions */
+#define S390_SPECIAL_OP_SIZE 2
+
+/* Last target instruction for the EX helper */
+extern ULong last_execute_target;
+
+/*---------------------------------------------------------------*/
+/*--- end                                   guest_s390_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+#endif /* __VEX_GUEST_S390_DEFS_H */
diff --git a/main/VEX/priv/guest_s390_helpers.c b/main/VEX/priv/guest_s390_helpers.c
new file mode 100644
index 0000000..60149f0
--- /dev/null
+++ b/main/VEX/priv/guest_s390_helpers.c
@@ -0,0 +1,1263 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                              guest_s390_helpers.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#include "libvex_basictypes.h"
+#include "libvex_emwarn.h"
+#include "libvex_guest_s390x.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_s390x_common.h"
+
+#include "main_util.h"
+#include "guest_generic_bb_to_IR.h"
+#include "guest_s390_defs.h"
+
+void
+LibVEX_GuestS390X_initialise(VexGuestS390XState *state)
+{
+/*------------------------------------------------------------*/
+/*--- Initialise ar registers                              ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_a0 = 0;
+   state->guest_a1 = 0;
+   state->guest_a2 = 0;
+   state->guest_a3 = 0;
+   state->guest_a4 = 0;
+   state->guest_a5 = 0;
+   state->guest_a6 = 0;
+   state->guest_a7 = 0;
+   state->guest_a8 = 0;
+   state->guest_a9 = 0;
+   state->guest_a10 = 0;
+   state->guest_a11 = 0;
+   state->guest_a12 = 0;
+   state->guest_a13 = 0;
+   state->guest_a14 = 0;
+   state->guest_a15 = 0;
+
+/*------------------------------------------------------------*/
+/*--- Initialise fpr registers                             ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_f0 = 0;
+   state->guest_f1 = 0;
+   state->guest_f2 = 0;
+   state->guest_f3 = 0;
+   state->guest_f4 = 0;
+   state->guest_f5 = 0;
+   state->guest_f6 = 0;
+   state->guest_f7 = 0;
+   state->guest_f8 = 0;
+   state->guest_f9 = 0;
+   state->guest_f10 = 0;
+   state->guest_f11 = 0;
+   state->guest_f12 = 0;
+   state->guest_f13 = 0;
+   state->guest_f14 = 0;
+   state->guest_f15 = 0;
+
+/*------------------------------------------------------------*/
+/*--- Initialise gpr registers                             ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_r0 = 0;
+   state->guest_r1 = 0;
+   state->guest_r2 = 0;
+   state->guest_r3 = 0;
+   state->guest_r4 = 0;
+   state->guest_r5 = 0;
+   state->guest_r6 = 0;
+   state->guest_r7 = 0;
+   state->guest_r8 = 0;
+   state->guest_r9 = 0;
+   state->guest_r10 = 0;
+   state->guest_r11 = 0;
+   state->guest_r12 = 0;
+   state->guest_r13 = 0;
+   state->guest_r14 = 0;
+   state->guest_r15 = 0;
+
+/*------------------------------------------------------------*/
+/*--- Initialise S390 miscellaneous registers              ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_counter = 0;
+   state->guest_fpc = 0;
+   state->guest_IA = 0;
+
+/*------------------------------------------------------------*/
+/*--- Initialise S390 pseudo registers                     ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_SYSNO = 0;
+
+/*------------------------------------------------------------*/
+/*--- Initialise generic pseudo registers                  ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_NRADDR = 0;
+   state->guest_TISTART = 0;
+   state->guest_TILEN = 0;
+   state->guest_IP_AT_SYSCALL = 0;
+   state->guest_EMWARN = EmWarn_NONE;
+
+/*------------------------------------------------------------*/
+/*--- Initialise thunk                                     ---*/
+/*------------------------------------------------------------*/
+
+   state->guest_CC_OP = 0;
+   state->guest_CC_DEP1 = 0;
+   state->guest_CC_DEP2 = 0;
+   state->guest_CC_NDEP = 0;
+}
+
+
+/* Figure out if any part of the guest state contained in minoff
+   .. maxoff requires precise memory exceptions.  If in doubt return
+   True (but this is generates significantly slower code).  */
+Bool
+guest_s390x_state_requires_precise_mem_exns(Int minoff, Int maxoff)
+{
+   Int lr_min = S390X_GUEST_OFFSET(guest_LR);
+   Int lr_max = lr_min + 8 - 1;
+   Int sp_min = S390X_GUEST_OFFSET(guest_SP);
+   Int sp_max = sp_min + 8 - 1;
+   Int fp_min = S390X_GUEST_OFFSET(guest_FP);
+   Int fp_max = fp_min + 8 - 1;
+   Int ia_min = S390X_GUEST_OFFSET(guest_IA);
+   Int ia_max = ia_min + 8 - 1;
+
+   if (maxoff < lr_min || minoff > lr_max) {
+      /* No overlap with LR */
+   } else {
+      return True;
+   }
+
+   if (maxoff < sp_min || minoff > sp_max) {
+      /* No overlap with SP */
+   } else {
+      return True;
+   }
+
+   if (maxoff < fp_min || minoff > fp_max) {
+      /* No overlap with FP */
+   } else {
+      return True;
+   }
+
+   if (maxoff < ia_min || minoff > ia_max) {
+      /* No overlap with IA */
+   } else {
+      return True;
+   }
+
+   return False;
+}
+
+
+#define ALWAYSDEFD(field)                             \
+    { S390X_GUEST_OFFSET(field),            \
+      (sizeof ((VexGuestS390XState*)0)->field) }
+
+VexGuestLayout s390xGuest_layout = {
+
+   /* Total size of the guest state, in bytes. */
+   .total_sizeB = sizeof(VexGuestS390XState),
+
+   /* Describe the stack pointer. */
+   .offset_SP = S390X_GUEST_OFFSET(guest_SP),
+   .sizeof_SP = 8,
+
+   /* Describe the frame pointer. */
+   .offset_FP = S390X_GUEST_OFFSET(guest_FP),
+   .sizeof_FP = 8,
+
+   /* Describe the instruction pointer. */
+   .offset_IP = S390X_GUEST_OFFSET(guest_IA),
+   .sizeof_IP = 8,
+
+   /* Describe any sections to be regarded by Memcheck as
+      'always-defined'. */
+   .n_alwaysDefd = 9,
+
+   /* Flags thunk: OP and NDEP are always defined, whereas DEP1
+      and DEP2 have to be tracked.  See detailed comment in
+      gdefs.h on meaning of thunk fields. */
+   .alwaysDefd = {
+      /*  0 */ ALWAYSDEFD(guest_CC_OP),     /* generic */
+      /*  1 */ ALWAYSDEFD(guest_CC_NDEP),   /* generic */
+      /*  2 */ ALWAYSDEFD(guest_EMWARN),    /* generic */
+      /*  3 */ ALWAYSDEFD(guest_TISTART),   /* generic */
+      /*  4 */ ALWAYSDEFD(guest_TILEN),     /* generic */
+      /*  5 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* generic */
+      /*  6 */ ALWAYSDEFD(guest_IA),        /* control reg */
+      /*  7 */ ALWAYSDEFD(guest_fpc),       /* control reg */
+      /*  8 */ ALWAYSDEFD(guest_counter),   /* internal usage register */
+   }
+};
+
+/*------------------------------------------------------------*/
+/*--- Dirty helper for invalid opcode 00                   ---*/
+/*------------------------------------------------------------*/
+#if defined(VGA_s390x)
+void
+s390x_dirtyhelper_00(VexGuestS390XState *guest_state)
+{
+   /* Avoid infinite loop in case SIGILL is caught. See also
+      none/tests/s390x/op_exception.c */
+   guest_state->guest_IA += 2;
+
+   asm volatile(".hword 0\n");
+}
+#else
+void s390x_dirtyhelper_00(VexGuestS390XState *guest_state) { }
+#endif
+
+/*------------------------------------------------------------*/
+/*--- Dirty helper for EXecute                             ---*/
+/*------------------------------------------------------------*/
+void
+s390x_dirtyhelper_EX(ULong torun)
+{
+   last_execute_target = torun;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Dirty helper for Clock instructions                  ---*/
+/*------------------------------------------------------------*/
+#if defined(VGA_s390x)
+ULong s390x_dirtyhelper_STCK(ULong *addr)
+{
+   int cc;
+
+   asm volatile("stck %0\n"
+                "ipm %1\n"
+                "srl %1,28\n"
+                : "+Q" (*addr), "=d" (cc) : : "cc");
+   return cc;
+}
+
+ULong s390x_dirtyhelper_STCKE(ULong *addr)
+{
+   int cc;
+
+   asm volatile("stcke %0\n"
+                "ipm %1\n"
+                "srl %1,28\n"
+                : "+Q" (*addr), "=d" (cc) : : "cc");
+   return cc;
+}
+
+ULong s390x_dirtyhelper_STCKF(ULong *addr)
+{
+   int cc;
+
+   asm volatile(".insn s,0xb27c0000,%0\n"
+                "ipm %1\n"
+                "srl %1,28\n"
+                : "+Q" (*addr), "=d" (cc) : : "cc");
+   return cc;
+}
+#else
+ULong s390x_dirtyhelper_STCK(ULong *addr)  {return 3;}
+ULong s390x_dirtyhelper_STCKF(ULong *addr) {return 3;}
+ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;}
+#endif /* VGA_s390x */
+
+/*------------------------------------------------------------*/
+/*--- Dirty helper for Store Facility instruction          ---*/
+/*------------------------------------------------------------*/
+#if defined(VGA_s390x)
+ULong
+s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, HWord addr)
+{
+   ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i;
+   register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF;  /* r0[56:63] */
+
+   /* We cannot store more than S390_NUM_FACILITY_DW
+      (and it makes not much sense to do so anyhow) */
+   if (reg0 > S390_NUM_FACILITY_DW - 1)
+      reg0 = S390_NUM_FACILITY_DW - 1;
+
+   num_dw = reg0 + 1;  /* number of double words written */
+
+   asm volatile(" .insn s,0xb2b00000,%0\n"   /* stfle */
+                "ipm    %2\n"
+                "srl    %2,28\n"
+                : "=m" (hoststfle), "+d"(reg0), "=d"(cc) : : "cc", "memory");
+
+   /* Update guest register 0  with what STFLE set r0 to */
+   guest_state->guest_r0 = reg0;
+
+   for (i = 0; i < num_dw; ++i)
+      ((ULong *)addr)[i] = hoststfle[i];
+
+   return cc;
+}
+
+#else
+
+ULong
+s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, HWord addr)
+{
+   return 3;
+}
+#endif /* VGA_s390x */
+
+/*------------------------------------------------------------*/
+/*--- Helper for condition code.                           ---*/
+/*------------------------------------------------------------*/
+
+#define S390_CC_FOR_BINARY(opcode,cc_dep1,cc_dep2) \
+({ \
+   __asm__ volatile ( \
+        opcode " %[op1],%[op2]\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+d"(cc_dep1) \
+                                   : [op2] "d"(cc_dep2) \
+                                   : "cc");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_TERNARY_SUBB(opcode,cc_dep1,cc_dep2,cc_ndep) \
+({ \
+   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
+      for rationale. */ \
+   cc_dep2 = cc_dep2 ^ cc_ndep; \
+   __asm__ volatile ( \
+	"lghi 0,1\n\t" \
+	"sr 0,%[op3]\n\t" /* borrow to cc */ \
+        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
+                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
+                                   : "0", "cc");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_TERNARY_ADDC(opcode,cc_dep1,cc_dep2,cc_ndep) \
+({ \
+   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
+      for rationale. */ \
+   cc_dep2 = cc_dep2 ^ cc_ndep; \
+   __asm__ volatile ( \
+	"lgfr 0,%[op3]\n\t" /* first load cc_ndep */ \
+	"aghi 0,0\n\t" /* and convert it into a cc */ \
+        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
+                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
+                                   : "0", "cc");\
+   psw >> 28;   /* cc */ \
+})
+
+
+#define S390_CC_FOR_BFP_RESULT(opcode,cc_dep1) \
+({ \
+   __asm__ volatile ( \
+        opcode " 0,%[op]\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
+                                   : [op]  "f"(cc_dep1) \
+                                   : "cc", "f0");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_BFP128_RESULT(hi,lo) \
+({ \
+   __asm__ volatile ( \
+        "ldr   4,%[high]\n\t" \
+        "ldr   6,%[low]\n\t" \
+        "ltxbr 0,4\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
+                                   : [high] "f"(hi), [low] "f"(lo) \
+                                   : "cc", "f0", "f2", "f4", "f6");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_BFP_CONVERT(opcode,cc_dep1) \
+({ \
+   __asm__ volatile ( \
+        opcode " 0,0,%[op]\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
+                                   : [op]  "f"(cc_dep1) \
+                                   : "cc", "r0");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_BFP128_CONVERT(opcode,hi,lo) \
+({ \
+   __asm__ volatile ( \
+        "ldr   4,%[high]\n\t" \
+        "ldr   6,%[low]\n\t" \
+        opcode " 0,0,4\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
+                                   : [high] "f"(hi), [low] "f"(lo) \
+                                   : "cc", "r0", "f4", "f6");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_BFP_TDC(opcode,cc_dep1,cc_dep2) \
+({ \
+   __asm__ volatile ( \
+        opcode " %[value],0(%[class])\n\t" \
+        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
+                                   : [value] "f"(cc_dep1), \
+                                     [class] "a"(cc_dep2)  \
+                                   : "cc");\
+   psw >> 28;   /* cc */ \
+})
+
+#define S390_CC_FOR_BFP128_TDC(cc_dep1,cc_dep2,cc_ndep) \
+({ \
+   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put1f128Z \
+      for rationale. */ \
+   cc_dep2 = cc_dep2 ^ cc_ndep; \
+   __asm__ volatile ( \
+        "ldr  4,%[high]\n\t" \
+        "ldr  6,%[low]\n\t" \
+        "tcxb 4,0(%[class])\n\t" \
+        "ipm  %[psw]\n\t"          : [psw] "=d"(psw) \
+                                   : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
+                                     [class] "a"(cc_ndep)  \
+                                   : "cc", "f4", "f6");\
+   psw >> 28;   /* cc */ \
+})
+
+
+/* Return the value of the condition code from the supplied thunk parameters.
+   This is not the value of the PSW. It is the value of the 2 CC bits within
+   the PSW. The returned value is thusly in the interval [0:3]. */
+UInt
+s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep)
+{
+#if defined(VGA_s390x)
+   UInt psw;
+
+   switch (cc_op) {
+
+   case S390_CC_OP_BITWISE:
+      return S390_CC_FOR_BINARY("ogr", cc_dep1, (ULong)0);
+
+   case S390_CC_OP_SIGNED_COMPARE:
+      return S390_CC_FOR_BINARY("cgr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_COMPARE:
+      return S390_CC_FOR_BINARY("clgr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_SIGNED_ADD_64:
+      return S390_CC_FOR_BINARY("agr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_SIGNED_ADD_32:
+      return S390_CC_FOR_BINARY("ar", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_SIGNED_SUB_64:
+      return S390_CC_FOR_BINARY("sgr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_SIGNED_SUB_32:
+      return S390_CC_FOR_BINARY("sr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_ADD_64:
+      return S390_CC_FOR_BINARY("algr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_ADD_32:
+      return S390_CC_FOR_BINARY("alr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_ADDC_64:
+      return S390_CC_FOR_TERNARY_ADDC("alcgr", cc_dep1, cc_dep2, cc_ndep);
+
+   case S390_CC_OP_UNSIGNED_ADDC_32:
+      return S390_CC_FOR_TERNARY_ADDC("alcr", cc_dep1, cc_dep2, cc_ndep);
+
+   case S390_CC_OP_UNSIGNED_SUB_64:
+      return S390_CC_FOR_BINARY("slgr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_SUB_32:
+      return S390_CC_FOR_BINARY("slr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_UNSIGNED_SUBB_64:
+      return S390_CC_FOR_TERNARY_SUBB("slbgr", cc_dep1, cc_dep2, cc_ndep);
+
+   case S390_CC_OP_UNSIGNED_SUBB_32:
+      return S390_CC_FOR_TERNARY_SUBB("slbr", cc_dep1, cc_dep2, cc_ndep);
+
+   case S390_CC_OP_LOAD_AND_TEST:
+      /* Like signed comparison with 0 */
+      return S390_CC_FOR_BINARY("cgr", cc_dep1, (Long)0);
+
+   case S390_CC_OP_TEST_AND_SET:
+      /* Shift the sign bit into the LSB. Note, that the tested value is an
+         8-bit value which has been zero-extended to 32/64 bit. */
+      return cc_dep1 >> 7;
+
+   case S390_CC_OP_LOAD_POSITIVE_32:
+      __asm__ volatile (
+           "lpr  %[result],%[op]\n\t"
+           "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [result] "=d"(cc_dep1)
+                                        : [op] "d"(cc_dep1)
+                                        : "cc");
+      return psw >> 28;   /* cc */
+
+   case S390_CC_OP_LOAD_POSITIVE_64:
+      __asm__ volatile (
+           "lpgr %[result],%[op]\n\t"
+           "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [result] "=d"(cc_dep1)
+                                        : [op] "d"(cc_dep1)
+                                        : "cc");
+      return psw >> 28;   /* cc */
+
+   case S390_CC_OP_TEST_UNDER_MASK_8: {
+      UChar value  = cc_dep1;
+      UChar mask   = cc_dep2;
+
+      __asm__ volatile (
+           "bras %%r2,1f\n\t"             /* %r2 = address of next insn */
+           "tm %[value],0\n\t"            /* this is skipped, then EXecuted */
+           "1: ex %[mask],0(%%r2)\n\t"    /* EXecute TM after modifying mask */
+           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
+                                        : [value] "m"(value), [mask] "a"(mask)
+                                        : "r2", "cc");
+      return psw >> 28;   /* cc */
+   }
+
+   case S390_CC_OP_TEST_UNDER_MASK_16: {
+      /* Create a TMLL insn with the mask as given by cc_dep2 */
+      UInt insn  = (0xA701 << 16) | cc_dep2;
+      UInt value = cc_dep1;
+
+      __asm__ volatile (
+           "lr   1,%[value]\n\t"
+           "lhi  2,0x10\n\t"
+           "ex   2,%[insn]\n\t"
+           "ipm  %[psw]\n\t"       : [psw] "=d"(psw)
+                                   : [value] "d"(value), [insn] "m"(insn)
+                                   : "r1", "r2", "cc");
+      return psw >> 28;   /* cc */
+   }
+
+   case S390_CC_OP_SHIFT_LEFT_32:
+      __asm__ volatile (
+           "sla  %[op],0(%[amount])\n\t"
+           "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [op] "+d"(cc_dep1)
+                                        : [amount] "a"(cc_dep2)
+                                        : "cc");
+      return psw >> 28;   /* cc */
+
+   case S390_CC_OP_SHIFT_LEFT_64: {
+      Int high = (Int)(cc_dep1 >> 32);
+      Int low  = (Int)(cc_dep1 & 0xFFFFFFFF);
+
+      __asm__ volatile (
+           "lr   2,%[high]\n\t"
+           "lr   3,%[low]\n\t"
+           "slda 2,0(%[amount])\n\t"
+           "ipm %[psw]\n\t"             : [psw] "=d"(psw), [high] "+d"(high), [low] "+d"(low)
+                                        : [amount] "a"(cc_dep2)
+                                        : "cc", "r2", "r3");
+      return psw >> 28;   /* cc */
+   }
+
+   case S390_CC_OP_INSERT_CHAR_MASK_32: {
+      Int inserted = 0;
+      Int msb = 0;
+
+      if (cc_dep2 & 1) {
+         inserted |= cc_dep1 & 0xff;
+         msb = 0x80;
+      }
+      if (cc_dep2 & 2) {
+         inserted |= cc_dep1 & 0xff00;
+         msb = 0x8000;
+      }
+      if (cc_dep2 & 4) {
+         inserted |= cc_dep1 & 0xff0000;
+         msb = 0x800000;
+      }
+      if (cc_dep2 & 8) {
+         inserted |= cc_dep1 & 0xff000000;
+         msb = 0x80000000;
+      }
+
+      if (inserted & msb)  // MSB is 1
+         return 1;
+      if (inserted > 0)
+         return 2;
+      return 0;
+   }
+
+   case S390_CC_OP_BFP_RESULT_32:
+      return S390_CC_FOR_BFP_RESULT("ltebr", cc_dep1);
+
+   case S390_CC_OP_BFP_RESULT_64:
+      return S390_CC_FOR_BFP_RESULT("ltdbr", cc_dep1);
+
+   case S390_CC_OP_BFP_RESULT_128:
+      return S390_CC_FOR_BFP128_RESULT(cc_dep1, cc_dep2);
+
+   case S390_CC_OP_BFP_32_TO_INT_32:
+      return S390_CC_FOR_BFP_CONVERT("cfebr", cc_dep1);
+
+   case S390_CC_OP_BFP_64_TO_INT_32:
+      return S390_CC_FOR_BFP_CONVERT("cfdbr", cc_dep1);
+
+   case S390_CC_OP_BFP_128_TO_INT_32:
+      return S390_CC_FOR_BFP128_CONVERT("cfxbr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_BFP_32_TO_INT_64:
+      return S390_CC_FOR_BFP_CONVERT("cgebr", cc_dep1);
+
+   case S390_CC_OP_BFP_64_TO_INT_64:
+      return S390_CC_FOR_BFP_CONVERT("cgdbr", cc_dep1);
+
+   case S390_CC_OP_BFP_128_TO_INT_64:
+      return S390_CC_FOR_BFP128_CONVERT("cgxbr", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_BFP_TDC_32:
+      return S390_CC_FOR_BFP_TDC("tceb", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_BFP_TDC_64:
+      return S390_CC_FOR_BFP_TDC("tcdb", cc_dep1, cc_dep2);
+
+   case S390_CC_OP_BFP_TDC_128:
+      return S390_CC_FOR_BFP128_TDC(cc_dep1, cc_dep2, cc_ndep);
+
+   case S390_CC_OP_SET:
+      return cc_dep1;
+
+   default:
+      break;
+   }
+#endif
+   vpanic("s390_calculate_cc");
+}
+
+
+UInt
+s390_calculate_icc(ULong op, ULong dep1, ULong dep2)
+{
+   return s390_calculate_cc(op, dep1, dep2, 0 /* unused */);
+}
+
+
+/* Note that this does *not* return a Boolean value. The result needs to be
+   explicitly tested against zero. */
+UInt
+s390_calculate_cond(ULong mask, ULong op, ULong dep1, ULong dep2, ULong ndep)
+{
+   UInt cc = s390_calculate_cc(op, dep1, dep2, ndep);
+
+   return ((mask << cc) & 0x8);
+}
+
+/*------------------------------------------------------------*/
+/*--- spechelper for performance                           ---*/
+/*------------------------------------------------------------*/
+
+
+/* Convenience macros */
+#define unop(op,a1) IRExpr_Unop((op),(a1))
+#define binop(op,a1,a2) IRExpr_Binop((op),(a1),(a2))
+#define mkU64(v) IRExpr_Const(IRConst_U64(v))
+#define mkU32(v) IRExpr_Const(IRConst_U32(v))
+#define mkU8(v)  IRExpr_Const(IRConst_U8(v))
+
+
+static inline Bool
+isC64(IRExpr *expr)
+{
+   return expr->tag == Iex_Const && expr->Iex.Const.con->tag == Ico_U64;
+}
+
+
+/* The returned expression is NULL if no specialization was found. In that
+   case the helper function will be called. Otherwise, the expression has
+   type Ity_I32 and a Boolean value. */
+IRExpr *
+guest_s390x_spechelper(HChar *function_name, IRExpr **args,
+                       IRStmt **precedingStmts, Int n_precedingStmts)
+{
+   UInt i, arity = 0;
+
+   for (i = 0; args[i]; i++)
+      arity++;
+
+#  if 0
+   vex_printf("spec request:\n");
+   vex_printf("   %s  ", function_name);
+   for (i = 0; i < arity; i++) {
+      vex_printf("  ");
+      ppIRExpr(args[i]);
+   }
+   vex_printf("\n");
+#  endif
+
+   /* --------- Specialising "s390_calculate_cond" --------- */
+
+   if (vex_streq(function_name, "s390_calculate_cond")) {
+      IRExpr *cond_expr, *cc_op_expr, *cc_dep1, *cc_dep2;
+      ULong cond, cc_op;
+
+      vassert(arity == 5);
+
+      cond_expr  = args[0];
+      cc_op_expr = args[1];
+
+      /* The necessary requirement for all optimizations here is that the
+         condition and the cc_op are constant. So check that upfront. */
+      if (! isC64(cond_expr))  return NULL;
+      if (! isC64(cc_op_expr)) return NULL;
+
+      cond    = cond_expr->Iex.Const.con->Ico.U64;
+      cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
+
+      vassert(cond <= 15);
+
+      /*
+        +------+---+---+---+---+
+        | cc   | 0 | 1 | 2 | 3 |
+        | cond | 8 | 4 | 2 | 1 |
+        +------+---+---+---+---+
+      */
+      cc_dep1 = args[2];
+      cc_dep2 = args[3];
+
+      /* S390_CC_OP_SIGNED_COMPARE */
+      if (cc_op == S390_CC_OP_SIGNED_COMPARE) {
+         /*
+            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
+            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
+            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
+
+            Because cc == 3 cannot occur the rightmost bit of cond is
+            a don't care.
+         */
+         if (cond == 8 || cond == 8 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
+         }
+         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
+         }
+         if (cond == 4 || cond == 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
+         }
+         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
+         }
+         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
+         if (cond == 2 || cond == 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
+         }
+         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
+         }
+         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
+            return mkU32(1);
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_UNSIGNED_COMPARE */
+      if (cc_op == S390_CC_OP_UNSIGNED_COMPARE) {
+         /*
+            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
+            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
+            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
+
+            Because cc == 3 cannot occur the rightmost bit of cond is
+            a don't care.
+         */
+         if (cond == 8 || cond == 8 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
+         }
+         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
+         }
+         if (cond == 4 || cond == 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
+         }
+         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
+         }
+         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
+         if (cond == 2 || cond == 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
+         }
+         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
+         }
+         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
+            return mkU32(1);
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_LOAD_AND_TEST */
+      if (cc_op == S390_CC_OP_LOAD_AND_TEST) {
+         /*
+            cc == 0  --> cc_dep1 == 0   (cond == 8)
+            cc == 1  --> cc_dep1 <  0   (cond == 4)
+            cc == 2  --> cc_dep1 >  0   (cond == 2)
+
+            Because cc == 3 cannot occur the rightmost bit of cond is
+            a don't care.
+         */
+         if (cond == 8 || cond == 8 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
+         }
+         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
+         }
+         if (cond == 4 || cond == 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep1, mkU64(0)));
+         }
+         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, mkU64(0)));
+         }
+         /* cc_dep1 > 0  ---->  0 < cc_dep1 */
+         if (cond == 2 || cond == 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, mkU64(0), cc_dep1));
+         }
+         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, mkU64(0), cc_dep1));
+         }
+         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
+            return mkU32(1);
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_BITWISE */
+      if (cc_op == S390_CC_OP_BITWISE) {
+         /*
+            cc_dep1 is the result of the boolean operation.
+
+            cc == 0  --> cc_dep1 == 0   (cond == 8)
+            cc == 1  --> cc_dep1 != 0   (cond == 4)
+
+            Because cc == 2 and cc == 3 cannot occur the two rightmost bits of
+            cond are don't cares. Therefore:
+
+            cond == 00xx  -> always false
+            cond == 01xx  -> not equal
+            cond == 10xx  -> equal
+            cond == 11xx  -> always true
+         */
+         if ((cond & (8 + 4)) == 8 + 4) {
+            return mkU32(1);
+         }
+         if (cond & 8) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
+         }
+         if (cond & 4) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_INSERT_CHAR_MASK_32
+         Since the mask comes from an immediate field in the opcode, we
+         expect the mask to be a constant here. That simplifies matters. */
+      if (cc_op == S390_CC_OP_INSERT_CHAR_MASK_32) {
+         ULong mask;
+         UInt imask = 0, shift = 0;
+         IRExpr *word;
+
+         if (! isC64(cc_dep2)) goto missed;
+
+         mask = cc_dep2->Iex.Const.con->Ico.U64;
+
+         /* Extract the 32-bit value from the thunk */
+
+         word = unop(Iop_64to32, cc_dep1);
+
+         switch (mask) {
+         case 0:  shift =  0; imask = 0x00000000; break;
+         case 1:  shift = 24; imask = 0x000000FF; break;
+         case 2:  shift = 16; imask = 0x0000FF00; break;
+         case 3:  shift = 16; imask = 0x0000FFFF; break;
+         case 4:  shift =  8; imask = 0x00FF0000; break;
+         case 5:  shift =  8; imask = 0x00FF00FF; break;
+         case 6:  shift =  8; imask = 0x00FFFF00; break;
+         case 7:  shift =  8; imask = 0x00FFFFFF; break;
+         case 8:  shift =  0; imask = 0xFF000000; break;
+         case 9:  shift =  0; imask = 0xFF0000FF; break;
+         case 10: shift =  0; imask = 0xFF00FF00; break;
+         case 11: shift =  0; imask = 0xFF00FFFF; break;
+         case 12: shift =  0; imask = 0xFFFF0000; break;
+         case 13: shift =  0; imask = 0xFFFF00FF; break;
+         case 14: shift =  0; imask = 0xFFFFFF00; break;
+         case 15: shift =  0; imask = 0xFFFFFFFF; break;
+         }
+
+         /* Select the bits that were inserted */
+         word = binop(Iop_And32, word, mkU32(imask));
+
+         /* cc == 0  --> all inserted bits zero or mask == 0   (cond == 8)
+            cc == 1  --> leftmost inserted bit is one          (cond == 4)
+            cc == 2  --> leftmost inserted bit is zero and not (cond == 2)
+                         all inserted bits are zero
+
+            Because cc == 0,1,2 the rightmost bit of the mask is a don't care */
+         if (cond == 8 || cond == 8 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ32, word, mkU32(0)));
+         }
+         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE32, word, mkU32(0)));
+         }
+
+         /* Sign extend */
+         if (shift != 0) {
+            word = binop(Iop_Sar32, binop(Iop_Shl32, word, mkU8(shift)),
+                         mkU8(shift));
+         }
+
+         if (cond == 4 || cond == 4 + 1) {  /* word < 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, word, mkU32(0)));
+         }
+         if (cond == 2 || cond == 2 + 1) {  /* word > 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, mkU32(0), word));
+         }
+         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, word, mkU32(0)));
+         }
+         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, mkU32(0), word));
+         }
+         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
+            return mkU32(1);
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_TEST_UNDER_MASK_8
+         Since the mask comes from an immediate field in the opcode, we
+         expect the mask to be a constant here. That simplifies matters. */
+      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_8) {
+         ULong mask16;
+
+         if (! isC64(cc_dep2)) goto missed;
+
+         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
+
+         /* Get rid of the mask16 == 0 case first. Some of the simplifications
+            below (e.g. for OVFL) only hold if mask16 == 0.  */
+         if (mask16 == 0) {   /* cc == 0 */
+            if (cond & 0x8) return mkU32(1);
+            return mkU32(0);
+         }
+
+         /* cc == 2 is a don't care */
+         if (cond == 8 || cond == 8 + 2) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 7 || cond == 7 - 2) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 1 || cond == 1 + 2) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          cc_dep2));
+         }
+         if (cond == 14 || cond == 14 - 2) {  /* ! OVFL */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          cc_dep2));
+         }
+         goto missed;
+      }
+
+      /* S390_CC_OP_TEST_UNDER_MASK_16
+         Since the mask comes from an immediate field in the opcode, we
+         expect the mask to be a constant here. That simplifies matters. */
+      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_16) {
+         ULong mask16;
+         UInt msb;
+
+         if (! isC64(cc_dep2)) goto missed;
+
+         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
+
+         /* Get rid of the mask16 == 0 case first. Some of the simplifications
+            below (e.g. for OVFL) only hold if mask16 == 0.  */
+         if (mask16 == 0) {   /* cc == 0 */
+            if (cond & 0x8) return mkU32(1);
+            return mkU32(0);
+         }
+
+         if (cond == 8) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 7) {
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 1) {
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(mask16)));
+         }
+         if (cond == 14) {  /* ! OVFL */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_And64, cc_dep1, cc_dep2),
+                                          mkU64(mask16)));
+         }
+
+         /* Find MSB in mask */
+         msb = 0x8000;
+         while (msb > mask16)
+            msb >>= 1;
+
+         if (cond == 2) {  /* cc == 2 */
+            IRExpr *c1, *c2;
+
+            /* (cc_dep & msb) != 0 && (cc_dep & mask16) != mask16 */
+            c1 = binop(Iop_CmpNE64,
+                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
+            c2 = binop(Iop_CmpNE64,
+                       binop(Iop_And64, cc_dep1, cc_dep2),
+                       mkU64(mask16));
+            return binop(Iop_And32, unop(Iop_1Uto32, c1),
+                         unop(Iop_1Uto32, c2));
+         }
+
+         if (cond == 4) {  /* cc == 1 */
+            IRExpr *c1, *c2;
+
+            /* (cc_dep & msb) == 0 && (cc_dep & mask16) != 0 */
+            c1 = binop(Iop_CmpEQ64,
+                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
+            c2 = binop(Iop_CmpNE64,
+                       binop(Iop_And64, cc_dep1, cc_dep2),
+                       mkU64(0));
+            return binop(Iop_And32, unop(Iop_1Uto32, c1),
+                         unop(Iop_1Uto32, c2));
+         }
+
+         if (cond == 11) {  /* cc == 0,2,3 */
+            IRExpr *c1, *c2;
+
+            c1 = binop(Iop_CmpNE64,
+                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
+            c2 = binop(Iop_CmpEQ64,
+                       binop(Iop_And64, cc_dep1, cc_dep2),
+                       mkU64(0));
+            return binop(Iop_Or32, unop(Iop_1Uto32, c1),
+                         unop(Iop_1Uto32, c2));
+         }
+
+         if (cond == 3) {  /* cc == 2 || cc == 3 */
+            return unop(Iop_1Uto32,
+                        binop(Iop_CmpNE64,
+                              binop(Iop_And64, cc_dep1, mkU64(msb)),
+                              mkU64(0)));
+         }
+         if (cond == 12) { /* cc == 0 || cc == 1 */
+            return unop(Iop_1Uto32,
+                        binop(Iop_CmpEQ64,
+                              binop(Iop_And64, cc_dep1, mkU64(msb)),
+                              mkU64(0)));
+         }
+         // vex_printf("TUM mask = 0x%llx\n", mask16);
+         goto missed;
+      }
+
+      /* S390_CC_OP_UNSIGNED_SUB_64/32 */
+      if (cc_op == S390_CC_OP_UNSIGNED_SUB_64 ||
+          cc_op == S390_CC_OP_UNSIGNED_SUB_32) {
+         /*
+            cc_dep1, cc_dep2 are the zero extended left and right operands
+
+            cc == 1  --> result != 0, borrow    (cond == 4)
+            cc == 2  --> result == 0, no borrow (cond == 2)
+            cc == 3  --> result != 0, no borrow (cond == 1)
+
+            cc = (cc_dep1 == cc_dep2) ? 2
+                                      : (cc_dep1 > cc_dep2) ? 3 : 1;
+
+            Because cc == 0 cannot occur the leftmost bit of cond is
+            a don't care.
+         */
+         if (cond == 1 || cond == 1 + 8) {  /* cc == 3   op2 < op1 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
+         }
+         if (cond == 2 || cond == 2 + 8) {  /* cc == 2 */
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
+         }
+         if (cond == 4 || cond == 4 + 8) {  /* cc == 1 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
+         }
+         if (cond == 3 || cond == 3 + 8) {  /* cc == 2 || cc == 3 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
+         }
+         if (cond == 6 || cond == 6 + 8) {  /* cc == 2 || cc == 1 */
+            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
+         }
+
+         if (cond == 5 || cond == 5 + 8) {  /* cc == 3 || cc == 1 */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
+         }
+         if (cond == 7 || cond == 7 + 8) {
+            return mkU32(1);
+         }
+         /* Remaining case */
+         return mkU32(0);
+      }
+
+      /* S390_CC_OP_UNSIGNED_ADD_64 */
+      if (cc_op == S390_CC_OP_UNSIGNED_ADD_64) {
+         /*
+            cc_dep1, cc_dep2 are the zero extended left and right operands
+
+            cc == 0  --> result == 0, no carry  (cond == 8)
+            cc == 1  --> result != 0, no carry  (cond == 4)
+            cc == 2  --> result == 0, carry     (cond == 2)
+            cc == 3  --> result != 0, carry     (cond == 1)
+         */
+         if (cond == 8) { /* cc == 0 */
+            /* Both inputs are 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_Or64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 7) { /* cc == 1,2,3 */
+            /* Not both inputs are 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_Or64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_Add64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_Add64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         goto missed;
+      }
+
+      /* S390_CC_OP_UNSIGNED_ADD_32 */
+      if (cc_op == S390_CC_OP_UNSIGNED_ADD_32) {
+         /*
+            cc_dep1, cc_dep2 are the zero extended left and right operands
+
+            cc == 0  --> result == 0, no carry  (cond == 8)
+            cc == 1  --> result != 0, no carry  (cond == 4)
+            cc == 2  --> result == 0, carry     (cond == 2)
+            cc == 3  --> result != 0, carry     (cond == 1)
+         */
+         if (cond == 8) { /* cc == 0 */
+            /* Both inputs are 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
+                                          binop(Iop_Or64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 7) { /* cc == 1,2,3 */
+            /* Not both inputs are 0 */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
+                                          binop(Iop_Or64, cc_dep1, cc_dep2),
+                                          mkU64(0)));
+         }
+         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
+            return unop(Iop_1Uto32, binop(Iop_CmpEQ32,
+                                          binop(Iop_Add32,
+                                                unop(Iop_64to32, cc_dep1),
+                                                unop(Iop_64to32, cc_dep2)),
+                                          mkU32(0)));
+         }
+         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
+            return unop(Iop_1Uto32, binop(Iop_CmpNE32,
+                                          binop(Iop_Add32,
+                                                unop(Iop_64to32, cc_dep1),
+                                                unop(Iop_64to32, cc_dep2)),
+                                          mkU32(0)));
+         }
+         goto missed;
+      }
+
+      /* S390_CC_OP_SET */
+      if (cc_op == S390_CC_OP_SET) {
+         /* cc_dep1 is the condition code
+
+            Return 1, if ((cond << cc_dep1) & 0x8) != 0 */
+
+        return unop(Iop_1Uto32,
+                    binop(Iop_CmpNE64,
+                          binop(Iop_And64,
+                                binop(Iop_Shl64, cond_expr,
+                                      unop(Iop_64to8, cc_dep1)),
+                                mkU64(8)),
+                          mkU64(0)));
+      }
+
+      /* S390_CC_OP_TEST_AND_SET */
+      if (cc_op == S390_CC_OP_TEST_AND_SET) {
+         /* cc_dep1 is the zero-extended loaded value
+
+            cc == 0  --> leftmost bit is zero  (cond == 8)
+            cc == 1  --> leftmost bit is one   (cond == 4)
+
+            As cc is either 0 or 1, only the two leftmost bits of the mask
+            are relevant. */
+         IRExpr *bit = binop(Iop_Shr64, cc_dep1, mkU8(7));
+
+         switch (cond & (8 + 4)) {
+         case 0:     return mkU32(0);
+         case 4:     return unop(Iop_1Uto32, binop(Iop_CmpNE64, bit, mkU64(0)));
+         case 8:     return unop(Iop_1Uto32, binop(Iop_CmpEQ64, bit, mkU64(0)));
+         case 8 + 4: return mkU32(1);
+         }
+         /* not reached */
+      }
+
+missed:
+      ;
+   }
+
+   return NULL;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                guest_s390_helpers.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/guest_s390_toIR.c b/main/VEX/priv/guest_s390_toIR.c
new file mode 100644
index 0000000..ec89860
--- /dev/null
+++ b/main/VEX/priv/guest_s390_toIR.c
@@ -0,0 +1,13357 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                 guest_s390_toIR.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm and Christian Borntraeger */
+
+/* Translates s390 code to IR. */
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex_guest_s390x.h"      /* VexGuestS390XState */
+#include "libvex.h"                  /* needed for bb_to_IR.h */
+#include "libvex_guest_offsets.h"    /* OFFSET_s390x_SYSNO */
+#include "libvex_s390x_common.h"
+#include "main_util.h"               /* vassert */
+#include "main_globals.h"            /* vex_traceflags */
+#include "guest_generic_bb_to_IR.h"  /* DisResult */
+#include "guest_s390_defs.h"         /* prototypes for this file's functions */
+#include "host_s390_disasm.h"
+#include "host_s390_defs.h"          /* S390_ROUND_xyzzy */
+
+
+/*------------------------------------------------------------*/
+/*--- Forward declarations                                 ---*/
+/*------------------------------------------------------------*/
+static UInt s390_decode_and_irgen(UChar *, UInt, DisResult *);
+
+
+/*------------------------------------------------------------*/
+/*--- Globals                                              ---*/
+/*------------------------------------------------------------*/
+
+/* The IRSB* into which we're generating code. */
+static IRSB *irsb;
+
+/* The guest address for the instruction currently being
+   translated. */
+static Addr64 guest_IA_curr_instr;
+
+/* The guest address for the instruction following the current instruction. */
+static Addr64 guest_IA_next_instr;
+
+/* Result of disassembly step. */
+static DisResult *dis_res;
+
+/* Resteer function and callback data */
+static Bool (*resteer_fn)(void *, Addr64);
+static void *resteer_data;
+
+/* The last seen execute target instruction */
+ULong last_execute_target;
+
+/* The possible outcomes of a decoding operation */
+typedef enum {
+   S390_DECODE_OK,
+   S390_DECODE_UNKNOWN_INSN,
+   S390_DECODE_UNIMPLEMENTED_INSN,
+   S390_DECODE_UNKNOWN_SPECIAL_INSN,
+   S390_DECODE_ERROR
+} s390_decode_t;
+
+/*------------------------------------------------------------*/
+/*--- Helpers for constructing IR.                         ---*/
+/*------------------------------------------------------------*/
+
+/* Sign extend a value with the given number of bits. This is a
+   macro because it allows us to overload the type of the value.
+   Note that VALUE must have a signed type! */
+#undef sign_extend
+#define sign_extend(value,num_bits) \
+(((value) << (sizeof(__typeof__(value)) * 8 - (num_bits))) >> \
+ (sizeof(__typeof__(value)) * 8 - (num_bits)))
+
+
+/* Add a statement to the current irsb. */
+static __inline__ void
+stmt(IRStmt *st)
+{
+   addStmtToIRSB(irsb, st);
+}
+
+/* Allocate a new temporary of the given type. */
+static __inline__ IRTemp
+newTemp(IRType type)
+{
+   vassert(isPlausibleIRType(type));
+
+   return newIRTemp(irsb->tyenv, type);
+}
+
+/* Create an expression node for a temporary */
+static __inline__ IRExpr *
+mkexpr(IRTemp tmp)
+{
+   return IRExpr_RdTmp(tmp);
+}
+
+/* Add a statement that assigns to a temporary */
+static __inline__ void
+assign(IRTemp dst, IRExpr *expr)
+{
+   stmt(IRStmt_WrTmp(dst, expr));
+}
+
+/* Create a temporary of the given type and assign the expression to it */
+static __inline__ IRTemp
+mktemp(IRType type, IRExpr *expr)
+{
+   IRTemp temp = newTemp(type);
+
+   assign(temp, expr);
+
+   return temp;
+}
+
+/* Create a unary expression */
+static __inline__ IRExpr *
+unop(IROp kind, IRExpr *op)
+{
+   return IRExpr_Unop(kind, op);
+}
+
+/* Create a binary expression */
+static __inline__ IRExpr *
+binop(IROp kind, IRExpr *op1, IRExpr *op2)
+{
+   return IRExpr_Binop(kind, op1, op2);
+}
+
+/* Create a ternary expression */
+static __inline__ IRExpr *
+triop(IROp kind, IRExpr *op1, IRExpr *op2, IRExpr *op3)
+{
+   return IRExpr_Triop(kind, op1, op2, op3);
+}
+
+/* Create a quaternary expression */
+static __inline__  IRExpr *
+qop(IROp kind, IRExpr *op1, IRExpr *op2, IRExpr *op3, IRExpr *op4)
+{
+   return IRExpr_Qop(kind, op1, op2, op3, op4);
+}
+
+/* Create an expression node for an 8-bit integer constant */
+static __inline__ IRExpr *
+mkU8(UInt value)
+{
+   vassert(value < 256);
+
+   return IRExpr_Const(IRConst_U8((UChar)value));
+}
+
+/* Create an expression node for a 16-bit integer constant */
+static __inline__ IRExpr *
+mkU16(UInt value)
+{
+   vassert(value < 65536);
+
+   return IRExpr_Const(IRConst_U16((UShort)value));
+}
+
+/* Create an expression node for a 32-bit integer constant */
+static __inline__ IRExpr *
+mkU32(UInt value)
+{
+   return IRExpr_Const(IRConst_U32(value));
+}
+
+/* Create an expression node for a 64-bit integer constant */
+static __inline__ IRExpr *
+mkU64(ULong value)
+{
+   return IRExpr_Const(IRConst_U64(value));
+}
+
+/* Create an expression node for a 32-bit floating point constant
+   whose value is given by a bit pattern. */
+static __inline__ IRExpr *
+mkF32i(UInt value)
+{
+   return IRExpr_Const(IRConst_F32i(value));
+}
+
+/* Create an expression node for a 32-bit floating point constant
+   whose value is given by a bit pattern. */
+static __inline__ IRExpr *
+mkF64i(ULong value)
+{
+   return IRExpr_Const(IRConst_F64i(value));
+}
+
+/* Little helper function for my sanity. ITE = if-then-else */
+static IRExpr *
+mkite(IRExpr *condition, IRExpr *iftrue, IRExpr *iffalse)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
+
+   return IRExpr_Mux0X(unop(Iop_1Uto8, condition), iffalse, iftrue);
+}
+
+/* Add a statement that stores DATA at ADDR. This is a big-endian machine. */
+static void __inline__
+store(IRExpr *addr, IRExpr *data)
+{
+   stmt(IRStmt_Store(Iend_BE, addr, data));
+}
+
+/* Create an expression that loads a TYPE sized value from ADDR.
+   This is a big-endian machine. */
+static __inline__ IRExpr *
+load(IRType type, IRExpr *addr)
+{
+   return IRExpr_Load(Iend_BE, type, addr);
+}
+
+/* Function call */
+static void
+call_function(IRExpr *callee_address)
+{
+   irsb->next = callee_address;
+   irsb->jumpkind = Ijk_Call;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* Function call with known target. */
+static void
+call_function_and_chase(Addr64 callee_address)
+{
+   if (resteer_fn(resteer_data, callee_address)) {
+      dis_res->whatNext   = Dis_ResteerU;
+      dis_res->continueAt = callee_address;
+   } else {
+      irsb->next = mkU64(callee_address);
+      irsb->jumpkind = Ijk_Call;
+      dis_res->whatNext = Dis_StopHere;
+   }
+}
+
+/* Function return sequence */
+static void
+return_from_function(IRExpr *return_address)
+{
+   irsb->next = return_address;
+   irsb->jumpkind = Ijk_Ret;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* A conditional branch whose target is not known at instrumentation time.
+
+   if (condition) goto computed_target;
+
+   Needs to be represented as:
+
+   if (! condition) goto next_instruction;
+   goto computed_target;
+
+   This inversion is being handled at code generation time. So we just
+   take the condition here as is.
+*/
+static void
+if_not_condition_goto_computed(IRExpr *condition, IRExpr *target)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
+
+   stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr)));
+
+   irsb->next = target;
+   irsb->jumpkind = Ijk_Boring;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* A conditional branch whose target is known at instrumentation time. */
+static void
+if_condition_goto(IRExpr *condition, Addr64 target)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
+
+   stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(target)));
+   dis_res->whatNext = Dis_Continue;
+}
+
+/* An unconditional branch. Target may or may not be known at instrumentation
+   time. */
+static void
+always_goto(IRExpr *target)
+{
+   irsb->next = target;
+   irsb->jumpkind = Ijk_Boring;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* An unconditional branch to a known target. */
+static void
+always_goto_and_chase(Addr64 target)
+{
+   if (resteer_fn(resteer_data, target)) {
+      dis_res->whatNext   = Dis_ResteerU;
+      dis_res->continueAt = target;
+   } else {
+      irsb->next = mkU64(target);
+      irsb->jumpkind = Ijk_Boring;
+      dis_res->whatNext = Dis_StopHere;
+   }
+}
+
+/* A system call */
+static void
+system_call(IRExpr *sysno)
+{
+   /* Store the system call number in the pseudo register. */
+   stmt(IRStmt_Put(OFFSET_s390x_SYSNO, sysno));
+
+   /* Store the current IA into guest_IP_AT_SYSCALL. libvex_ir.h says so. */
+   stmt(IRStmt_Put(OFFSET_s390x_IP_AT_SYSCALL, mkU64(guest_IA_curr_instr)));
+
+   /* It's important that all ArchRegs carry their up-to-date value
+      at this point.  So we declare an end-of-block here, which
+      forces any TempRegs caching ArchRegs to be flushed. */
+   irsb->next = mkU64(guest_IA_next_instr);
+
+   irsb->jumpkind = Ijk_Sys_syscall;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* Encode the s390 rounding mode as it appears in the m3/m4 fields of certain
+   instructions to VEX's IRRoundingMode. */
+static IRRoundingMode
+encode_rounding_mode(UChar mode)
+{
+   switch (mode) {
+   case S390_ROUND_NEAREST_EVEN:  return Irrm_NEAREST;
+   case S390_ROUND_ZERO:          return Irrm_ZERO;
+   case S390_ROUND_POSINF:        return Irrm_PosINF;
+   case S390_ROUND_NEGINF:        return Irrm_NegINF;
+   }
+   vpanic("encode_rounding_mode");
+}
+
+static __inline__ IRExpr *get_fpr_dw0(UInt);
+static __inline__ void    put_fpr_dw0(UInt, IRExpr *);
+
+/* Read a floating point register pair and combine their contents into a
+   128-bit value */
+static IRExpr *
+get_fpr_pair(UInt archreg)
+{
+   IRExpr *high = get_fpr_dw0(archreg);
+   IRExpr *low  = get_fpr_dw0(archreg + 2);
+
+   return binop(Iop_F64HLtoF128, high, low);
+}
+
+/* Write a 128-bit floating point value into a register pair. */
+static void
+put_fpr_pair(UInt archreg, IRExpr *expr)
+{
+   IRExpr *high = unop(Iop_F128HItoF64, expr);
+   IRExpr *low  = unop(Iop_F128LOtoF64, expr);
+
+   put_fpr_dw0(archreg,     high);
+   put_fpr_dw0(archreg + 2, low);
+}
+
+
+/* Flags thunk offsets */
+#define S390X_GUEST_OFFSET_CC_OP    S390X_GUEST_OFFSET(guest_CC_OP)
+#define S390X_GUEST_OFFSET_CC_DEP1  S390X_GUEST_OFFSET(guest_CC_DEP1)
+#define S390X_GUEST_OFFSET_CC_DEP2  S390X_GUEST_OFFSET(guest_CC_DEP2)
+#define S390X_GUEST_OFFSET_CC_NDEP  S390X_GUEST_OFFSET(guest_CC_NDEP)
+
+/*------------------------------------------------------------*/
+/*--- Build the flags thunk.                               ---*/
+/*------------------------------------------------------------*/
+
+/* Completely fill the flags thunk. We're always filling all fields.
+   Apparently, that is better for redundant PUT elimination. */
+static void
+s390_cc_thunk_fill(IRExpr *op, IRExpr *dep1, IRExpr *dep2, IRExpr *ndep)
+{
+   UInt op_off, dep1_off, dep2_off, ndep_off;
+
+   op_off   = S390X_GUEST_OFFSET_CC_OP;
+   dep1_off = S390X_GUEST_OFFSET_CC_DEP1;
+   dep2_off = S390X_GUEST_OFFSET_CC_DEP2;
+   ndep_off = S390X_GUEST_OFFSET_CC_NDEP;
+
+   stmt(IRStmt_Put(op_off,   op));
+   stmt(IRStmt_Put(dep1_off, dep1));
+   stmt(IRStmt_Put(dep2_off, dep2));
+   stmt(IRStmt_Put(ndep_off, ndep));
+}
+
+
+/* Create an expression for V and widen the result to 64 bit. */
+static IRExpr *
+s390_cc_widen(IRTemp v, Bool sign_extend)
+{
+   IRExpr *expr;
+
+   expr = mkexpr(v);
+
+   switch (typeOfIRTemp(irsb->tyenv, v)) {
+   case Ity_I64:
+      break;
+   case Ity_I32:
+      expr = unop(sign_extend ? Iop_32Sto64 : Iop_32Uto64, expr);
+      break;
+   case Ity_I16:
+      expr = unop(sign_extend ? Iop_16Sto64 : Iop_16Uto64, expr);
+      break;
+   case Ity_I8:
+      expr = unop(sign_extend ? Iop_8Sto64 : Iop_8Uto64, expr);
+      break;
+   default:
+      vpanic("s390_cc_widen");
+   }
+
+   return expr;
+}
+
+static void
+s390_cc_thunk_put1(UInt opc, IRTemp d1, Bool sign_extend)
+{
+   IRExpr *op, *dep1, *dep2, *ndep;
+
+   op   = mkU64(opc);
+   dep1 = s390_cc_widen(d1, sign_extend);
+   dep2 = mkU64(0);
+   ndep = mkU64(0);
+
+   s390_cc_thunk_fill(op, dep1, dep2, ndep);
+}
+
+
+static void
+s390_cc_thunk_put2(UInt opc, IRTemp d1, IRTemp d2, Bool sign_extend)
+{
+   IRExpr *op, *dep1, *dep2, *ndep;
+
+   op   = mkU64(opc);
+   dep1 = s390_cc_widen(d1, sign_extend);
+   dep2 = s390_cc_widen(d2, sign_extend);
+   ndep = mkU64(0);
+
+   s390_cc_thunk_fill(op, dep1, dep2, ndep);
+}
+
+
+/* memcheck believes that the NDEP field in the flags thunk is always
+   defined. But for some flag computations (e.g. add with carry) that is
+   just not true. We therefore need to convey to memcheck that the value
+   of the ndep field does matter and therefore we make the DEP2 field
+   depend on it:
+
+   DEP2 = original_DEP2 ^ NDEP
+
+   In s390_calculate_cc we exploit that  (a^b)^b == a
+   I.e. we xor the DEP2 value with the NDEP value to recover the
+   original_DEP2 value. */
+static void
+s390_cc_thunk_put3(UInt opc, IRTemp d1, IRTemp d2, IRTemp nd, Bool sign_extend)
+{
+   IRExpr *op, *dep1, *dep2, *ndep, *dep2x;
+
+   op   = mkU64(opc);
+   dep1 = s390_cc_widen(d1, sign_extend);
+   dep2 = s390_cc_widen(d2, sign_extend);
+   ndep = s390_cc_widen(nd, sign_extend);
+
+   dep2x = binop(Iop_Xor64, dep2, ndep);
+
+   s390_cc_thunk_fill(op, dep1, dep2x, ndep);
+}
+
+
+/* Write one floating point value into the flags thunk */
+static void
+s390_cc_thunk_put1f(UInt opc, IRTemp d1)
+{
+   IRExpr *op, *dep1, *dep2, *ndep;
+
+   op   = mkU64(opc);
+   dep1 = mkexpr(d1);
+   dep2 = mkU64(0);
+   ndep = mkU64(0);
+
+   s390_cc_thunk_fill(op, dep1, dep2, ndep);
+}
+
+
+/* Write a floating point value and an integer into the flags thunk. The
+   integer value is zero-extended first. */
+static void
+s390_cc_thunk_putFZ(UInt opc, IRTemp d1, IRTemp d2)
+{
+   IRExpr *op, *dep1, *dep2, *ndep;
+
+   op   = mkU64(opc);
+   dep1 = mkexpr(d1);
+   dep2 = s390_cc_widen(d2, False);
+   ndep = mkU64(0);
+
+   s390_cc_thunk_fill(op, dep1, dep2, ndep);
+}
+
+
+/* Write a 128-bit floating point value into the flags thunk. This is
+   done by splitting the value into two 64-bits values. */
+static void
+s390_cc_thunk_put1f128(UInt opc, IRTemp d1)
+{
+   IRExpr *op, *hi, *lo, *ndep;
+
+   op   = mkU64(opc);
+   hi   = unop(Iop_F128HItoF64, mkexpr(d1));
+   lo   = unop(Iop_F128LOtoF64, mkexpr(d1));
+   ndep = mkU64(0);
+
+   s390_cc_thunk_fill(op, hi, lo, ndep);
+}
+
+
+/* Write a 128-bit floating point value and an integer into the flags thunk.
+   The integer value is zero-extended first. */
+static void
+s390_cc_thunk_put1f128Z(UInt opc, IRTemp d1, IRTemp nd)
+{
+   IRExpr *op, *hi, *lo, *lox, *ndep;
+
+   op   = mkU64(opc);
+   hi   = unop(Iop_F128HItoF64, mkexpr(d1));
+   lo   = unop(Iop_ReinterpF64asI64, unop(Iop_F128LOtoF64, mkexpr(d1)));
+   ndep = s390_cc_widen(nd, False);
+
+   lox = binop(Iop_Xor64, lo, ndep);  /* convey dependency */
+
+   s390_cc_thunk_fill(op, hi, lox, ndep);
+}
+
+
+static void
+s390_cc_set(UInt val)
+{
+   s390_cc_thunk_fill(mkU64(S390_CC_OP_SET),
+                      mkU64(val), mkU64(0), mkU64(0));
+}
+
+/* Build IR to calculate the condition code from flags thunk.
+   Returns an expression of type Ity_I32 */
+static IRExpr *
+s390_call_calculate_cc(void)
+{
+   IRExpr **args, *call, *op, *dep1, *dep2, *ndep;
+
+   op   = IRExpr_Get(S390X_GUEST_OFFSET_CC_OP,   Ity_I64);
+   dep1 = IRExpr_Get(S390X_GUEST_OFFSET_CC_DEP1, Ity_I64);
+   dep2 = IRExpr_Get(S390X_GUEST_OFFSET_CC_DEP2, Ity_I64);
+   ndep = IRExpr_Get(S390X_GUEST_OFFSET_CC_NDEP, Ity_I64);
+
+   args = mkIRExprVec_4(op, dep1, dep2, ndep);
+   call = mkIRExprCCall(Ity_I32, 0 /*regparm*/,
+                        "s390_calculate_cc", &s390_calculate_cc, args);
+
+   /* Exclude OP and NDEP from definedness checking.  We're only
+      interested in DEP1 and DEP2. */
+   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
+
+   return call;
+}
+
+/* Build IR to calculate the internal condition code for a "compare and branch"
+   insn. Returns an expression of type Ity_I32 */
+static IRExpr *
+s390_call_calculate_icc(UInt opc, IRTemp op1, IRTemp op2, Bool sign_extend)
+{
+   IRExpr **args, *call, *op, *dep1, *dep2;
+
+   op   = mkU64(opc);
+   dep1 = s390_cc_widen(op1, sign_extend);
+   dep2 = s390_cc_widen(op2, sign_extend);
+
+   args = mkIRExprVec_3(op, dep1, dep2);
+   call = mkIRExprCCall(Ity_I32, 0 /*regparm*/,
+                        "s390_calculate_icc", &s390_calculate_icc, args);
+
+   /* Exclude OP from definedness checking.  We're only
+      interested in DEP1 and DEP2. */
+   call->Iex.CCall.cee->mcx_mask = (1<<0);
+
+   return call;
+}
+
+/* Build IR to calculate the condition code from flags thunk.
+   Returns an expression of type Ity_I32 */
+static IRExpr *
+s390_call_calculate_cond(UInt m)
+{
+   IRExpr **args, *call, *op, *dep1, *dep2, *ndep, *mask;
+
+   mask = mkU64(m);
+   op   = IRExpr_Get(S390X_GUEST_OFFSET_CC_OP,   Ity_I64);
+   dep1 = IRExpr_Get(S390X_GUEST_OFFSET_CC_DEP1, Ity_I64);
+   dep2 = IRExpr_Get(S390X_GUEST_OFFSET_CC_DEP2, Ity_I64);
+   ndep = IRExpr_Get(S390X_GUEST_OFFSET_CC_NDEP, Ity_I64);
+
+   args = mkIRExprVec_5(mask, op, dep1, dep2, ndep);
+   call = mkIRExprCCall(Ity_I32, 0 /*regparm*/,
+                        "s390_calculate_cond", &s390_calculate_cond, args);
+
+   /* Exclude the requested condition, OP and NDEP from definedness
+      checking.  We're only interested in DEP1 and DEP2. */
+   call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
+
+   return call;
+}
+
+#define s390_cc_thunk_putZ(op,dep1)  s390_cc_thunk_put1(op,dep1,False)
+#define s390_cc_thunk_putS(op,dep1)  s390_cc_thunk_put1(op,dep1,True)
+#define s390_cc_thunk_putF(op,dep1)  s390_cc_thunk_put1f(op,dep1)
+#define s390_cc_thunk_putZZ(op,dep1,dep2) s390_cc_thunk_put2(op,dep1,dep2,False)
+#define s390_cc_thunk_putSS(op,dep1,dep2) s390_cc_thunk_put2(op,dep1,dep2,True)
+#define s390_cc_thunk_putFF(op,dep1,dep2) s390_cc_thunk_put2f(op,dep1,dep2)
+#define s390_cc_thunk_putZZZ(op,dep1,dep2,ndep) \
+        s390_cc_thunk_put3(op,dep1,dep2,ndep,False)
+#define s390_cc_thunk_putSSS(op,dep1,dep2,ndep) \
+        s390_cc_thunk_put3(op,dep1,dep2,ndep,True)
+#define s390_call_calculate_iccZZ(op,dep1,dep2) \
+        s390_call_calculate_icc(op,dep1,dep2,False)
+#define s390_call_calculate_iccSS(op,dep1,dep2) \
+        s390_call_calculate_icc(op,dep1,dep2,True)
+
+
+#define OFFB_TISTART   S390X_GUEST_OFFSET(guest_TISTART)
+#define OFFB_TILEN     S390X_GUEST_OFFSET(guest_TILEN)
+
+
+/*------------------------------------------------------------*/
+/*--- Guest register access                                ---*/
+/*------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------*/
+/*--- ar registers                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Return the guest state offset of a ar register. */
+static UInt
+ar_offset(UInt archreg)
+{
+   static const UInt offset[16] = {
+      S390X_GUEST_OFFSET(guest_a0),
+      S390X_GUEST_OFFSET(guest_a1),
+      S390X_GUEST_OFFSET(guest_a2),
+      S390X_GUEST_OFFSET(guest_a3),
+      S390X_GUEST_OFFSET(guest_a4),
+      S390X_GUEST_OFFSET(guest_a5),
+      S390X_GUEST_OFFSET(guest_a6),
+      S390X_GUEST_OFFSET(guest_a7),
+      S390X_GUEST_OFFSET(guest_a8),
+      S390X_GUEST_OFFSET(guest_a9),
+      S390X_GUEST_OFFSET(guest_a10),
+      S390X_GUEST_OFFSET(guest_a11),
+      S390X_GUEST_OFFSET(guest_a12),
+      S390X_GUEST_OFFSET(guest_a13),
+      S390X_GUEST_OFFSET(guest_a14),
+      S390X_GUEST_OFFSET(guest_a15),
+   };
+
+   vassert(archreg < 16);
+
+   return offset[archreg];
+}
+
+
+/* Return the guest state offset of word #0 of a ar register. */
+static __inline__ UInt
+ar_w0_offset(UInt archreg)
+{
+   return ar_offset(archreg) + 0;
+}
+
+/* Write word #0 of a ar to the guest state. */
+static __inline__ void
+put_ar_w0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(ar_w0_offset(archreg), expr));
+}
+
+/* Read word #0 of a ar register. */
+static __inline__ IRExpr *
+get_ar_w0(UInt archreg)
+{
+   return IRExpr_Get(ar_w0_offset(archreg), Ity_I32);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- fpr registers                                        ---*/
+/*------------------------------------------------------------*/
+
+/* Return the guest state offset of a fpr register. */
+static UInt
+fpr_offset(UInt archreg)
+{
+   static const UInt offset[16] = {
+      S390X_GUEST_OFFSET(guest_f0),
+      S390X_GUEST_OFFSET(guest_f1),
+      S390X_GUEST_OFFSET(guest_f2),
+      S390X_GUEST_OFFSET(guest_f3),
+      S390X_GUEST_OFFSET(guest_f4),
+      S390X_GUEST_OFFSET(guest_f5),
+      S390X_GUEST_OFFSET(guest_f6),
+      S390X_GUEST_OFFSET(guest_f7),
+      S390X_GUEST_OFFSET(guest_f8),
+      S390X_GUEST_OFFSET(guest_f9),
+      S390X_GUEST_OFFSET(guest_f10),
+      S390X_GUEST_OFFSET(guest_f11),
+      S390X_GUEST_OFFSET(guest_f12),
+      S390X_GUEST_OFFSET(guest_f13),
+      S390X_GUEST_OFFSET(guest_f14),
+      S390X_GUEST_OFFSET(guest_f15),
+   };
+
+   vassert(archreg < 16);
+
+   return offset[archreg];
+}
+
+
+/* Return the guest state offset of word #0 of a fpr register. */
+static __inline__ UInt
+fpr_w0_offset(UInt archreg)
+{
+   return fpr_offset(archreg) + 0;
+}
+
+/* Write word #0 of a fpr to the guest state. */
+static __inline__ void
+put_fpr_w0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_F32);
+
+   stmt(IRStmt_Put(fpr_w0_offset(archreg), expr));
+}
+
+/* Read word #0 of a fpr register. */
+static __inline__ IRExpr *
+get_fpr_w0(UInt archreg)
+{
+   return IRExpr_Get(fpr_w0_offset(archreg), Ity_F32);
+}
+
+/* Return the guest state offset of double word #0 of a fpr register. */
+static __inline__ UInt
+fpr_dw0_offset(UInt archreg)
+{
+   return fpr_offset(archreg) + 0;
+}
+
+/* Write double word #0 of a fpr to the guest state. */
+static __inline__ void
+put_fpr_dw0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_F64);
+
+   stmt(IRStmt_Put(fpr_dw0_offset(archreg), expr));
+}
+
+/* Read double word #0 of a fpr register. */
+static __inline__ IRExpr *
+get_fpr_dw0(UInt archreg)
+{
+   return IRExpr_Get(fpr_dw0_offset(archreg), Ity_F64);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- gpr registers                                        ---*/
+/*------------------------------------------------------------*/
+
+/* Return the guest state offset of a gpr register. */
+static UInt
+gpr_offset(UInt archreg)
+{
+   static const UInt offset[16] = {
+      S390X_GUEST_OFFSET(guest_r0),
+      S390X_GUEST_OFFSET(guest_r1),
+      S390X_GUEST_OFFSET(guest_r2),
+      S390X_GUEST_OFFSET(guest_r3),
+      S390X_GUEST_OFFSET(guest_r4),
+      S390X_GUEST_OFFSET(guest_r5),
+      S390X_GUEST_OFFSET(guest_r6),
+      S390X_GUEST_OFFSET(guest_r7),
+      S390X_GUEST_OFFSET(guest_r8),
+      S390X_GUEST_OFFSET(guest_r9),
+      S390X_GUEST_OFFSET(guest_r10),
+      S390X_GUEST_OFFSET(guest_r11),
+      S390X_GUEST_OFFSET(guest_r12),
+      S390X_GUEST_OFFSET(guest_r13),
+      S390X_GUEST_OFFSET(guest_r14),
+      S390X_GUEST_OFFSET(guest_r15),
+   };
+
+   vassert(archreg < 16);
+
+   return offset[archreg];
+}
+
+
+/* Return the guest state offset of word #0 of a gpr register. */
+static __inline__ UInt
+gpr_w0_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 0;
+}
+
+/* Write word #0 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_w0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(gpr_w0_offset(archreg), expr));
+}
+
+/* Read word #0 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_w0(UInt archreg)
+{
+   return IRExpr_Get(gpr_w0_offset(archreg), Ity_I32);
+}
+
+/* Return the guest state offset of double word #0 of a gpr register. */
+static __inline__ UInt
+gpr_dw0_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 0;
+}
+
+/* Write double word #0 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_dw0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I64);
+
+   stmt(IRStmt_Put(gpr_dw0_offset(archreg), expr));
+}
+
+/* Read double word #0 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_dw0(UInt archreg)
+{
+   return IRExpr_Get(gpr_dw0_offset(archreg), Ity_I64);
+}
+
+/* Return the guest state offset of half word #1 of a gpr register. */
+static __inline__ UInt
+gpr_hw1_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 2;
+}
+
+/* Write half word #1 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_hw1(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I16);
+
+   stmt(IRStmt_Put(gpr_hw1_offset(archreg), expr));
+}
+
+/* Read half word #1 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_hw1(UInt archreg)
+{
+   return IRExpr_Get(gpr_hw1_offset(archreg), Ity_I16);
+}
+
+/* Return the guest state offset of byte #6 of a gpr register. */
+static __inline__ UInt
+gpr_b6_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 6;
+}
+
+/* Write byte #6 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b6(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b6_offset(archreg), expr));
+}
+
+/* Read byte #6 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b6(UInt archreg)
+{
+   return IRExpr_Get(gpr_b6_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of byte #3 of a gpr register. */
+static __inline__ UInt
+gpr_b3_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 3;
+}
+
+/* Write byte #3 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b3(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b3_offset(archreg), expr));
+}
+
+/* Read byte #3 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b3(UInt archreg)
+{
+   return IRExpr_Get(gpr_b3_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of byte #0 of a gpr register. */
+static __inline__ UInt
+gpr_b0_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 0;
+}
+
+/* Write byte #0 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b0_offset(archreg), expr));
+}
+
+/* Read byte #0 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b0(UInt archreg)
+{
+   return IRExpr_Get(gpr_b0_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of word #1 of a gpr register. */
+static __inline__ UInt
+gpr_w1_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 4;
+}
+
+/* Write word #1 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_w1(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(gpr_w1_offset(archreg), expr));
+}
+
+/* Read word #1 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_w1(UInt archreg)
+{
+   return IRExpr_Get(gpr_w1_offset(archreg), Ity_I32);
+}
+
+/* Return the guest state offset of half word #3 of a gpr register. */
+static __inline__ UInt
+gpr_hw3_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 6;
+}
+
+/* Write half word #3 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_hw3(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I16);
+
+   stmt(IRStmt_Put(gpr_hw3_offset(archreg), expr));
+}
+
+/* Read half word #3 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_hw3(UInt archreg)
+{
+   return IRExpr_Get(gpr_hw3_offset(archreg), Ity_I16);
+}
+
+/* Return the guest state offset of byte #7 of a gpr register. */
+static __inline__ UInt
+gpr_b7_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 7;
+}
+
+/* Write byte #7 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b7(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b7_offset(archreg), expr));
+}
+
+/* Read byte #7 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b7(UInt archreg)
+{
+   return IRExpr_Get(gpr_b7_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of half word #0 of a gpr register. */
+static __inline__ UInt
+gpr_hw0_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 0;
+}
+
+/* Write half word #0 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_hw0(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I16);
+
+   stmt(IRStmt_Put(gpr_hw0_offset(archreg), expr));
+}
+
+/* Read half word #0 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_hw0(UInt archreg)
+{
+   return IRExpr_Get(gpr_hw0_offset(archreg), Ity_I16);
+}
+
+/* Return the guest state offset of byte #4 of a gpr register. */
+static __inline__ UInt
+gpr_b4_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 4;
+}
+
+/* Write byte #4 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b4(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b4_offset(archreg), expr));
+}
+
+/* Read byte #4 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b4(UInt archreg)
+{
+   return IRExpr_Get(gpr_b4_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of byte #1 of a gpr register. */
+static __inline__ UInt
+gpr_b1_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 1;
+}
+
+/* Write byte #1 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b1(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b1_offset(archreg), expr));
+}
+
+/* Read byte #1 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b1(UInt archreg)
+{
+   return IRExpr_Get(gpr_b1_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of half word #2 of a gpr register. */
+static __inline__ UInt
+gpr_hw2_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 4;
+}
+
+/* Write half word #2 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_hw2(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I16);
+
+   stmt(IRStmt_Put(gpr_hw2_offset(archreg), expr));
+}
+
+/* Read half word #2 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_hw2(UInt archreg)
+{
+   return IRExpr_Get(gpr_hw2_offset(archreg), Ity_I16);
+}
+
+/* Return the guest state offset of byte #5 of a gpr register. */
+static __inline__ UInt
+gpr_b5_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 5;
+}
+
+/* Write byte #5 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b5(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b5_offset(archreg), expr));
+}
+
+/* Read byte #5 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b5(UInt archreg)
+{
+   return IRExpr_Get(gpr_b5_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of byte #2 of a gpr register. */
+static __inline__ UInt
+gpr_b2_offset(UInt archreg)
+{
+   return gpr_offset(archreg) + 2;
+}
+
+/* Write byte #2 of a gpr to the guest state. */
+static __inline__ void
+put_gpr_b2(UInt archreg, IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I8);
+
+   stmt(IRStmt_Put(gpr_b2_offset(archreg), expr));
+}
+
+/* Read byte #2 of a gpr register. */
+static __inline__ IRExpr *
+get_gpr_b2(UInt archreg)
+{
+   return IRExpr_Get(gpr_b2_offset(archreg), Ity_I8);
+}
+
+/* Return the guest state offset of the counter register. */
+static UInt
+counter_offset(void)
+{
+   return S390X_GUEST_OFFSET(guest_counter);
+}
+
+/* Return the guest state offset of double word #0 of the counter register. */
+static __inline__ UInt
+counter_dw0_offset(void)
+{
+   return counter_offset() + 0;
+}
+
+/* Write double word #0 of the counter to the guest state. */
+static __inline__ void
+put_counter_dw0(IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I64);
+
+   stmt(IRStmt_Put(counter_dw0_offset(), expr));
+}
+
+/* Read double word #0 of the counter register. */
+static __inline__ IRExpr *
+get_counter_dw0(void)
+{
+   return IRExpr_Get(counter_dw0_offset(), Ity_I64);
+}
+
+/* Return the guest state offset of word #0 of the counter register. */
+static __inline__ UInt
+counter_w0_offset(void)
+{
+   return counter_offset() + 0;
+}
+
+/* Return the guest state offset of word #1 of the counter register. */
+static __inline__ UInt
+counter_w1_offset(void)
+{
+   return counter_offset() + 4;
+}
+
+/* Write word #0 of the counter to the guest state. */
+static __inline__ void
+put_counter_w0(IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(counter_w0_offset(), expr));
+}
+
+/* Read word #0 of the counter register. */
+static __inline__ IRExpr *
+get_counter_w0(void)
+{
+   return IRExpr_Get(counter_w0_offset(), Ity_I32);
+}
+
+/* Write word #1 of the counter to the guest state. */
+static __inline__ void
+put_counter_w1(IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(counter_w1_offset(), expr));
+}
+
+/* Read word #1 of the counter register. */
+static __inline__ IRExpr *
+get_counter_w1(void)
+{
+   return IRExpr_Get(counter_w1_offset(), Ity_I32);
+}
+
+/* Return the guest state offset of the fpc register. */
+static UInt
+fpc_offset(void)
+{
+   return S390X_GUEST_OFFSET(guest_fpc);
+}
+
+/* Return the guest state offset of word #0 of the fpc register. */
+static __inline__ UInt
+fpc_w0_offset(void)
+{
+   return fpc_offset() + 0;
+}
+
+/* Write word #0 of the fpc to the guest state. */
+static __inline__ void
+put_fpc_w0(IRExpr *expr)
+{
+   vassert(typeOfIRExpr(irsb->tyenv, expr) == Ity_I32);
+
+   stmt(IRStmt_Put(fpc_w0_offset(), expr));
+}
+
+/* Read word #0 of the fpc register. */
+static __inline__ IRExpr *
+get_fpc_w0(void)
+{
+   return IRExpr_Get(fpc_w0_offset(), Ity_I32);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Build IR for formats                                 ---*/
+/*------------------------------------------------------------*/
+static void
+s390_format_I(HChar *(*irgen)(UChar i),
+              UChar i)
+{
+   HChar *mnm = irgen(i);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(MNM, UINT), mnm, i);
+}
+
+static void
+s390_format_RI(HChar *(*irgen)(UChar r1, UShort i2),
+               UChar r1, UShort i2)
+{
+   irgen(r1, i2);
+}
+
+static void
+s390_format_RI_RU(HChar *(*irgen)(UChar r1, UShort i2),
+                  UChar r1, UShort i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, UINT), mnm, r1, i2);
+}
+
+static void
+s390_format_RI_RI(HChar *(*irgen)(UChar r1, UShort i2),
+                  UChar r1, UShort i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, INT), mnm, r1, (Int)(Short)i2);
+}
+
+static void
+s390_format_RI_RP(HChar *(*irgen)(UChar r1, UShort i2),
+                  UChar r1, UShort i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, PCREL), mnm, r1, (Int)(Short)i2);
+}
+
+static void
+s390_format_RIE_RRP(HChar *(*irgen)(UChar r1, UChar r3, UShort i2),
+                    UChar r1, UChar r3, UShort i2)
+{
+   HChar *mnm = irgen(r1, r3, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, PCREL), mnm, r1, r3, (Int)(Short)i2);
+}
+
+static void
+s390_format_RIE_RRI0(HChar *(*irgen)(UChar r1, UChar r3, UShort i2),
+                     UChar r1, UChar r3, UShort i2)
+{
+   HChar *mnm = irgen(r1, r3, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, INT), mnm, r1, r3, (Int)(Short)i2);
+}
+
+static void
+s390_format_RIE_RRUUU(HChar *(*irgen)(UChar r1, UChar r2, UChar i3, UChar i4,
+                      UChar i5),
+                      UChar r1, UChar r2, UChar i3, UChar i4, UChar i5)
+{
+   HChar *mnm = irgen(r1, r2, i3, i4, i5);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC6(MNM, GPR, GPR, UINT, UINT, UINT), mnm, r1, r2, i3, i4,
+                  i5);
+}
+
+static void
+s390_format_RIE_RRPU(HChar *(*irgen)(UChar r1, UChar r2, UShort i4, UChar m3),
+                     UChar r1, UChar r2, UShort i4, UChar m3)
+{
+   HChar *mnm = irgen(r1, r2, i4, m3);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, GPR, CABM, PCREL), S390_XMNM_CAB, mnm, m3, r1,
+                  r2, m3, (Int)(Short)i4);
+}
+
+static void
+s390_format_RIE_RUPU(HChar *(*irgen)(UChar r1, UChar m3, UShort i4, UChar i2),
+                     UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   HChar *mnm = irgen(r1, m3, i4, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, UINT, CABM, PCREL), S390_XMNM_CAB, mnm, m3,
+                  r1, i2, m3, (Int)(Short)i4);
+}
+
+static void
+s390_format_RIE_RUPI(HChar *(*irgen)(UChar r1, UChar m3, UShort i4, UChar i2),
+                     UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   HChar *mnm = irgen(r1, m3, i4, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, INT, CABM, PCREL), S390_XMNM_CAB, mnm, m3, r1,
+                  (Int)(Char)i2, m3, (Int)(Short)i4);
+}
+
+static void
+s390_format_RIL(HChar *(*irgen)(UChar r1, UInt i2),
+                UChar r1, UInt i2)
+{
+   irgen(r1, i2);
+}
+
+static void
+s390_format_RIL_RU(HChar *(*irgen)(UChar r1, UInt i2),
+                   UChar r1, UInt i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, UINT), mnm, r1, i2);
+}
+
+static void
+s390_format_RIL_RI(HChar *(*irgen)(UChar r1, UInt i2),
+                   UChar r1, UInt i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, INT), mnm, r1, i2);
+}
+
+static void
+s390_format_RIL_RP(HChar *(*irgen)(UChar r1, UInt i2),
+                   UChar r1, UInt i2)
+{
+   HChar *mnm = irgen(r1, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, PCREL), mnm, r1, i2);
+}
+
+static void
+s390_format_RIL_UP(HChar *(*irgen)(void),
+                   UChar r1, UInt i2)
+{
+   HChar *mnm = irgen();
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UINT, PCREL), mnm, r1, i2);
+}
+
+static void
+s390_format_RIS_RURDI(HChar *(*irgen)(UChar r1, UChar m3, UChar i2,
+                      IRTemp op4addr),
+                      UChar r1, UChar m3, UChar b4, UShort d4, UChar i2)
+{
+   HChar *mnm;
+   IRTemp op4addr = newTemp(Ity_I64);
+
+   assign(op4addr, binop(Iop_Add64, mkU64(d4), b4 != 0 ? get_gpr_dw0(b4) :
+          mkU64(0)));
+
+   mnm = irgen(r1, m3, i2, op4addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, INT, CABM, UDXB), S390_XMNM_CAB, mnm, m3, r1,
+                  (Int)(Char)i2, m3, d4, 0, b4);
+}
+
+static void
+s390_format_RIS_RURDU(HChar *(*irgen)(UChar r1, UChar m3, UChar i2,
+                      IRTemp op4addr),
+                      UChar r1, UChar m3, UChar b4, UShort d4, UChar i2)
+{
+   HChar *mnm;
+   IRTemp op4addr = newTemp(Ity_I64);
+
+   assign(op4addr, binop(Iop_Add64, mkU64(d4), b4 != 0 ? get_gpr_dw0(b4) :
+          mkU64(0)));
+
+   mnm = irgen(r1, m3, i2, op4addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, UINT, CABM, UDXB), S390_XMNM_CAB, mnm, m3, r1,
+                  i2, m3, d4, 0, b4);
+}
+
+static void
+s390_format_RR(HChar *(*irgen)(UChar r1, UChar r2),
+               UChar r1, UChar r2)
+{
+   irgen(r1, r2);
+}
+
+static void
+s390_format_RR_RR(HChar *(*irgen)(UChar r1, UChar r2),
+                  UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RR_FF(HChar *(*irgen)(UChar r1, UChar r2),
+                  UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, FPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RRE(HChar *(*irgen)(UChar r1, UChar r2),
+                UChar r1, UChar r2)
+{
+   irgen(r1, r2);
+}
+
+static void
+s390_format_RRE_RR(HChar *(*irgen)(UChar r1, UChar r2),
+                   UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RRE_FF(HChar *(*irgen)(UChar r1, UChar r2),
+                   UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, FPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RRE_RF(HChar *(*irgen)(UChar, UChar),
+                   UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, FPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RRE_FR(HChar *(*irgen)(UChar r1, UChar r2),
+                   UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, GPR), mnm, r1, r2);
+}
+
+static void
+s390_format_RRE_R0(HChar *(*irgen)(UChar r1),
+                   UChar r1)
+{
+   HChar *mnm = irgen(r1);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(MNM, GPR), mnm, r1);
+}
+
+static void
+s390_format_RRE_F0(HChar *(*irgen)(UChar r1),
+                   UChar r1)
+{
+   HChar *mnm = irgen(r1);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(MNM, FPR), mnm, r1);
+}
+
+static void
+s390_format_RRF_F0FF(HChar *(*irgen)(UChar, UChar, UChar),
+                     UChar r1, UChar r3, UChar r2)
+{
+   HChar *mnm = irgen(r1, r3, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), mnm, r1, r3, r2);
+}
+
+static void
+s390_format_RRF_U0RR(HChar *(*irgen)(UChar m3, UChar r1, UChar r2),
+                     UChar m3, UChar r1, UChar r2, Int xmnm_kind)
+{
+   irgen(m3, r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(XMNM, GPR, GPR), xmnm_kind, m3, r1, r2);
+}
+
+static void
+s390_format_RRF_U0RF(HChar *(*irgen)(UChar r3, UChar r1, UChar r2),
+                     UChar r3, UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r3, r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), mnm, r1, r3, r2);
+}
+
+static void
+s390_format_RRF_F0FF2(HChar *(*irgen)(UChar, UChar, UChar),
+                      UChar r3, UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r3, r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), mnm, r1, r3, r2);
+}
+
+static void
+s390_format_RRF_R0RR2(HChar *(*irgen)(UChar r3, UChar r1, UChar r2),
+                      UChar r3, UChar r1, UChar r2)
+{
+   HChar *mnm = irgen(r3, r1, r2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, GPR), mnm, r1, r2, r3);
+}
+
+static void
+s390_format_RRS(HChar *(*irgen)(UChar r1, UChar r2, UChar m3, IRTemp op4addr),
+                UChar r1, UChar r2, UChar b4, UShort d4, UChar m3)
+{
+   HChar *mnm;
+   IRTemp op4addr = newTemp(Ity_I64);
+
+   assign(op4addr, binop(Iop_Add64, mkU64(d4), b4 != 0 ? get_gpr_dw0(b4) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r2, m3, op4addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC5(XMNM, GPR, GPR, CABM, UDXB), S390_XMNM_CAB, mnm, m3, r1,
+                  r2, m3, d4, 0, b4);
+}
+
+static void
+s390_format_RS_R0RD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                    UChar r1, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, UDXB), mnm, r1, d2, 0, b2);
+}
+
+static void
+s390_format_RS_RRRD(HChar *(*irgen)(UChar r1, UChar r3, IRTemp op2addr),
+                    UChar r1, UChar r3, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, UDXB), mnm, r1, r3, d2, 0, b2);
+}
+
+static void
+s390_format_RS_RURD(HChar *(*irgen)(UChar r1, UChar r3, IRTemp op2addr),
+                    UChar r1, UChar r3, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, UINT, UDXB), mnm, r1, r3, d2, 0, b2);
+}
+
+static void
+s390_format_RS_AARD(HChar *(*irgen)(UChar, UChar, IRTemp),
+                    UChar r1, UChar r3, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, AR, AR, UDXB), mnm, r1, r3, d2, 0, b2);
+}
+
+static void
+s390_format_RSI_RRP(HChar *(*irgen)(UChar r1, UChar r3, UShort i2),
+                    UChar r1, UChar r3, UShort i2)
+{
+   HChar *mnm = irgen(r1, r3, i2);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, PCREL), mnm, r1, r3, (Int)(Short)i2);
+}
+
+static void
+s390_format_RSY_RRRD(HChar *(*irgen)(UChar r1, UChar r3, IRTemp op2addr),
+                     UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, mkexpr(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), mnm, r1, r3, dh2, dl2, 0, b2);
+}
+
+static void
+s390_format_RSY_AARD(HChar *(*irgen)(UChar, UChar, IRTemp),
+                     UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, mkexpr(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, AR, AR, SDXB), mnm, r1, r3, dh2, dl2, 0, b2);
+}
+
+static void
+s390_format_RSY_RURD(HChar *(*irgen)(UChar r1, UChar r3, IRTemp op2addr),
+                     UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, mkexpr(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, GPR, UINT, SDXB), mnm, r1, r3, dh2, dl2, 0, b2);
+}
+
+static void
+s390_format_RSY_RDRM(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                     UChar r1, UChar m3, UChar b2, UShort dl2, UChar dh2,
+                     Int xmnm_kind)
+{
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   if_condition_goto(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)),
+                     guest_IA_next_instr);
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, mkexpr(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(XMNM, GPR, SDXB), xmnm_kind, m3, r1, dh2, dl2, 0, b2);
+}
+
+static void
+s390_format_RX(HChar *(*irgen)(UChar r1, UChar x2, UChar b2, UShort d2,
+               IRTemp op2addr),
+               UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkU64(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   irgen(r1, x2, b2, d2, op2addr);
+}
+
+static void
+s390_format_RX_RRRD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                    UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkU64(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, UDXB), mnm, r1, d2, x2, b2);
+}
+
+static void
+s390_format_RX_FRRD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                    UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkU64(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, UDXB), mnm, r1, d2, x2, b2);
+}
+
+static void
+s390_format_RXE_FRRD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                     UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkU64(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, UDXB), mnm, r1, d2, x2, b2);
+}
+
+static void
+s390_format_RXF_FRRDF(HChar *(*irgen)(UChar, IRTemp, UChar),
+                      UChar r3, UChar x2, UChar b2, UShort d2, UChar r1)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkU64(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r3, op2addr, r1);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, FPR, FPR, UDXB), mnm, r1, r3, d2, x2, b2);
+}
+
+static void
+s390_format_RXY_RRRD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                     UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkexpr(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, SDXB), mnm, r1, dh2, dl2, x2, b2);
+}
+
+static void
+s390_format_RXY_FRRD(HChar *(*irgen)(UChar r1, IRTemp op2addr),
+                     UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkexpr(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen(r1, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, FPR, SDXB), mnm, r1, dh2, dl2, x2, b2);
+}
+
+static void
+s390_format_RXY_URRD(HChar *(*irgen)(void),
+                     UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+   IRTemp d2 = newTemp(Ity_I64);
+
+   assign(d2, mkU64(((ULong)(Long)(Char)dh2 << 12) | ((ULong)dl2)));
+   assign(op2addr, binop(Iop_Add64, binop(Iop_Add64, mkexpr(d2),
+          b2 != 0 ? get_gpr_dw0(b2) : mkU64(0)), x2 != 0 ? get_gpr_dw0(x2) :
+          mkU64(0)));
+
+   mnm = irgen();
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UINT, SDXB), mnm, r1, dh2, dl2, x2, b2);
+}
+
+static void
+s390_format_S_RD(HChar *(*irgen)(IRTemp op2addr),
+                 UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(MNM, UDXB), mnm, d2, 0, b2);
+}
+
+static void
+s390_format_SI_URD(HChar *(*irgen)(UChar i2, IRTemp op1addr),
+                   UChar i2, UChar b1, UShort d1)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+
+   assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+
+   mnm = irgen(i2, op1addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UDXB, UINT), mnm, d1, 0, b1, i2);
+}
+
+static void
+s390_format_SIY_URD(HChar *(*irgen)(UChar i2, IRTemp op1addr),
+                    UChar i2, UChar b1, UShort dl1, UChar dh1)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+   IRTemp d1 = newTemp(Ity_I64);
+
+   assign(d1, mkU64(((ULong)(Long)(Char)dh1 << 12) | ((ULong)dl1)));
+   assign(op1addr, binop(Iop_Add64, mkexpr(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+
+   mnm = irgen(i2, op1addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, SDXB, UINT), mnm, dh1, dl1, 0, b1, i2);
+}
+
+static void
+s390_format_SIY_IRD(HChar *(*irgen)(UChar i2, IRTemp op1addr),
+                    UChar i2, UChar b1, UShort dl1, UChar dh1)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+   IRTemp d1 = newTemp(Ity_I64);
+
+   assign(d1, mkU64(((ULong)(Long)(Char)dh1 << 12) | ((ULong)dl1)));
+   assign(op1addr, binop(Iop_Add64, mkexpr(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+
+   mnm = irgen(i2, op1addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, SDXB, INT), mnm, dh1, dl1, 0, b1, (Int)(Char)i2);
+}
+
+static void
+s390_format_SS_L0RDRD(HChar *(*irgen)(UChar, IRTemp, IRTemp),
+                      UChar l, UChar b1, UShort d1, UChar b2, UShort d2)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(l, op1addr, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UDLB, UDXB), mnm, d1, l, b1, d2, 0, b2);
+}
+
+static void
+s390_format_SIL_RDI(HChar *(*irgen)(UShort i2, IRTemp op1addr),
+                    UChar b1, UShort d1, UShort i2)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+
+   assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+
+   mnm = irgen(i2, op1addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UDXB, INT), mnm, d1, 0, b1, (Int)(Short)i2);
+}
+
+static void
+s390_format_SIL_RDU(HChar *(*irgen)(UShort i2, IRTemp op1addr),
+                    UChar b1, UShort d1, UShort i2)
+{
+   HChar *mnm;
+   IRTemp op1addr = newTemp(Ity_I64);
+
+   assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) :
+          mkU64(0)));
+
+   mnm = irgen(i2, op1addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UDXB, UINT), mnm, d1, 0, b1, i2);
+}
+
+
+
+/*------------------------------------------------------------*/
+/*--- Build IR for opcodes                                 ---*/
+/*------------------------------------------------------------*/
+
+static HChar *
+s390_irgen_00(UChar r1 __attribute__((unused)),
+              UChar r2 __attribute__((unused)))
+{
+   IRDirty *d;
+
+   d = unsafeIRDirty_0_N (0, "s390x_dirtyhelper_00", &s390x_dirtyhelper_00,
+                          mkIRExprVec_0());
+   d->needsBBP = 1;  /* Need to pass pointer to guest state to helper */
+
+   d->fxState[0].fx     = Ifx_Modify;  /* read then write */
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_IA);
+   d->fxState[0].size   = sizeof(ULong);
+   d->nFxState = 1;
+
+   stmt(IRStmt_Dirty(d));
+
+   return "00";
+}
+
+static HChar *
+s390_irgen_AR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ar";
+}
+
+static HChar *
+s390_irgen_AGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "agr";
+}
+
+static HChar *
+s390_irgen_AGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "agfr";
+}
+
+static HChar *
+s390_irgen_ARK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op2, op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ark";
+}
+
+static HChar *
+s390_irgen_AGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op2, op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "agrk";
+}
+
+static HChar *
+s390_irgen_A(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "a";
+}
+
+static HChar *
+s390_irgen_AY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ay";
+}
+
+static HChar *
+s390_irgen_AG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ag";
+}
+
+static HChar *
+s390_irgen_AGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "agf";
+}
+
+static HChar *
+s390_irgen_AFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Int)i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32((UInt)op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "afi";
+}
+
+static HChar *
+s390_irgen_AGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Long)(Int)i2;
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkU64((ULong)op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "agfi";
+}
+
+static HChar *
+s390_irgen_AHIK(UChar r1, UChar r3, UShort i2)
+{
+   Int op2;
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   op2 = (Int)(Short)i2;
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkU32((UInt)op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, mktemp(Ity_I32, mkU32((UInt)
+                       op2)), op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ahik";
+}
+
+static HChar *
+s390_irgen_AGHIK(UChar r1, UChar r3, UShort i2)
+{
+   Long op2;
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   op2 = (Long)(Short)i2;
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkU64((ULong)op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, mktemp(Ity_I64, mkU64((ULong)
+                       op2)), op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "aghik";
+}
+
+static HChar *
+s390_irgen_ASI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, load(Ity_I32, mkexpr(op1addr)));
+   op2 = (Int)(Char)i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32((UInt)op2)));
+   store(mkexpr(op1addr), mkexpr(result));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+
+   return "asi";
+}
+
+static HChar *
+s390_irgen_AGSI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, load(Ity_I64, mkexpr(op1addr)));
+   op2 = (Long)(Char)i2;
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkU64((ULong)op2)));
+   store(mkexpr(op1addr), mkexpr(result));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+
+   return "agsi";
+}
+
+static HChar *
+s390_irgen_AH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ah";
+}
+
+static HChar *
+s390_irgen_AHY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ahy";
+}
+
+static HChar *
+s390_irgen_AHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Int)(Short)i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32((UInt)op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ahi";
+}
+
+static HChar *
+s390_irgen_AGHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Long)(Short)i2;
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkU64((ULong)op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "aghi";
+}
+
+static HChar *
+s390_irgen_AHHHR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r2));
+   assign(op3, get_gpr_w0(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "ahhhr";
+}
+
+static HChar *
+s390_irgen_AHHLR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "ahhlr";
+}
+
+static HChar *
+s390_irgen_AIH(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = (Int)i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32((UInt)op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "aih";
+}
+
+static HChar *
+s390_irgen_ALR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alr";
+}
+
+static HChar *
+s390_irgen_ALGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "algr";
+}
+
+static HChar *
+s390_irgen_ALGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, get_gpr_w1(r2)));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "algfr";
+}
+
+static HChar *
+s390_irgen_ALRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op2, op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alrk";
+}
+
+static HChar *
+s390_irgen_ALGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op2, op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "algrk";
+}
+
+static HChar *
+s390_irgen_AL(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "al";
+}
+
+static HChar *
+s390_irgen_ALY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "aly";
+}
+
+static HChar *
+s390_irgen_ALG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "alg";
+}
+
+static HChar *
+s390_irgen_ALGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr))));
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "algf";
+}
+
+static HChar *
+s390_irgen_ALFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alfi";
+}
+
+static HChar *
+s390_irgen_ALGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (ULong)i2;
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkU64(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, mktemp(Ity_I64,
+                       mkU64(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "algfi";
+}
+
+static HChar *
+s390_irgen_ALHHHR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r2));
+   assign(op3, get_gpr_w0(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "alhhhr";
+}
+
+static HChar *
+s390_irgen_ALHHLR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "alhhlr";
+}
+
+static HChar *
+s390_irgen_ALCR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp carry_in = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(carry_in, binop(Iop_Shr32, s390_call_calculate_cc(), mkU8(1)));
+   assign(result, binop(Iop_Add32, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)),
+          mkexpr(carry_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_ADDC_32, op1, op2, carry_in);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alcr";
+}
+
+static HChar *
+s390_irgen_ALCGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp carry_in = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(carry_in, unop(Iop_32Uto64, binop(Iop_Shr32, s390_call_calculate_cc(),
+          mkU8(1))));
+   assign(result, binop(Iop_Add64, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)),
+          mkexpr(carry_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_ADDC_64, op1, op2, carry_in);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "alcgr";
+}
+
+static HChar *
+s390_irgen_ALC(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp carry_in = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(carry_in, binop(Iop_Shr32, s390_call_calculate_cc(), mkU8(1)));
+   assign(result, binop(Iop_Add32, binop(Iop_Add32, mkexpr(op1), mkexpr(op2)),
+          mkexpr(carry_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_ADDC_32, op1, op2, carry_in);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alc";
+}
+
+static HChar *
+s390_irgen_ALCG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp carry_in = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(carry_in, unop(Iop_32Uto64, binop(Iop_Shr32, s390_call_calculate_cc(),
+          mkU8(1))));
+   assign(result, binop(Iop_Add64, binop(Iop_Add64, mkexpr(op1), mkexpr(op2)),
+          mkexpr(carry_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_ADDC_64, op1, op2, carry_in);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "alcg";
+}
+
+static HChar *
+s390_irgen_ALSI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, load(Ity_I32, mkexpr(op1addr)));
+   op2 = (UInt)(Int)(Char)i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "alsi";
+}
+
+static HChar *
+s390_irgen_ALGSI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, load(Ity_I64, mkexpr(op1addr)));
+   op2 = (ULong)(Long)(Char)i2;
+   assign(result, binop(Iop_Add64, mkexpr(op1), mkU64(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op1, mktemp(Ity_I64,
+                       mkU64(op2)));
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "algsi";
+}
+
+static HChar *
+s390_irgen_ALHSIK(UChar r1, UChar r3, UShort i2)
+{
+   UInt op2;
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   op2 = (UInt)(Int)(Short)i2;
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkU32(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, mktemp(Ity_I32, mkU32(op2)),
+                       op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "alhsik";
+}
+
+static HChar *
+s390_irgen_ALGHSIK(UChar r1, UChar r3, UShort i2)
+{
+   ULong op2;
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   op2 = (ULong)(Long)(Short)i2;
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkU64(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, mktemp(Ity_I64, mkU64(op2)),
+                       op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "alghsik";
+}
+
+static HChar *
+s390_irgen_ALSIH(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "alsih";
+}
+
+static HChar *
+s390_irgen_ALSIHN(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Add32, mkexpr(op1), mkU32(op2)));
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "alsihn";
+}
+
+static HChar *
+s390_irgen_NR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_And32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "nr";
+}
+
+static HChar *
+s390_irgen_NGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_And64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ngr";
+}
+
+static HChar *
+s390_irgen_NRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "nrk";
+}
+
+static HChar *
+s390_irgen_NGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ngrk";
+}
+
+static HChar *
+s390_irgen_N(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_And32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "n";
+}
+
+static HChar *
+s390_irgen_NY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_And32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ny";
+}
+
+static HChar *
+s390_irgen_NG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_And64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ng";
+}
+
+static HChar *
+s390_irgen_NI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_And8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "ni";
+}
+
+static HChar *
+s390_irgen_NIY(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_And8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "niy";
+}
+
+static HChar *
+s390_irgen_NIHF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "nihf";
+}
+
+static HChar *
+s390_irgen_NIHH(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw0(r1, mkexpr(result));
+
+   return "nihh";
+}
+
+static HChar *
+s390_irgen_NIHL(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw1(r1, mkexpr(result));
+
+   return "nihl";
+}
+
+static HChar *
+s390_irgen_NILF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "nilf";
+}
+
+static HChar *
+s390_irgen_NILH(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw2(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw2(r1, mkexpr(result));
+
+   return "nilh";
+}
+
+static HChar *
+s390_irgen_NILL(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw3(r1));
+   op2 = i2;
+   assign(result, binop(Iop_And16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw3(r1, mkexpr(result));
+
+   return "nill";
+}
+
+static HChar *
+s390_irgen_BASR(UChar r1, UChar r2)
+{
+   IRTemp target = newTemp(Ity_I64);
+
+   if (r2 == 0) {
+      put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 2ULL));
+   } else {
+      if (r1 != r2) {
+         put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 2ULL));
+         call_function(get_gpr_dw0(r2));
+      } else {
+         assign(target, get_gpr_dw0(r2));
+         put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 2ULL));
+         call_function(mkexpr(target));
+      }
+   }
+
+   return "basr";
+}
+
+static HChar *
+s390_irgen_BAS(UChar r1, IRTemp op2addr)
+{
+   IRTemp target = newTemp(Ity_I64);
+
+   put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 4ULL));
+   assign(target, mkexpr(op2addr));
+   call_function(mkexpr(target));
+
+   return "bas";
+}
+
+static HChar *
+s390_irgen_BCR(UChar r1, UChar r2)
+{
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (r2 == 0 && (r1 >= 14)) {    /* serialization */
+      stmt(IRStmt_MBE(Imbe_Fence));
+   }
+
+   if ((r2 == 0) || (r1 == 0)) {
+   } else {
+      if (r1 == 15) {
+         return_from_function(get_gpr_dw0(r2));
+      } else {
+         assign(cond, s390_call_calculate_cond(r1));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), get_gpr_dw0(r2));
+      }
+   }
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(XMNM, GPR), S390_XMNM_BCR, r1, r2);
+
+   return "bcr";
+}
+
+static HChar *
+s390_irgen_BC(UChar r1, UChar x2, UChar b2, UShort d2, IRTemp op2addr)
+{
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (r1 == 0) {
+   } else {
+      if (r1 == 15) {
+         always_goto(mkexpr(op2addr));
+      } else {
+         assign(cond, s390_call_calculate_cond(r1));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op2addr));
+      }
+   }
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(XMNM, UDXB), S390_XMNM_BC, r1, d2, x2, b2);
+
+   return "bc";
+}
+
+static HChar *
+s390_irgen_BCTR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1)));
+   if (r2 != 0) {
+      if_not_condition_goto_computed(binop(Iop_CmpEQ32, get_gpr_w1(r1), mkU32(0)
+                                     ), get_gpr_dw0(r2));
+   }
+
+   return "bctr";
+}
+
+static HChar *
+s390_irgen_BCTGR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1)));
+   if (r2 != 0) {
+      if_not_condition_goto_computed(binop(Iop_CmpEQ64, get_gpr_dw0(r1),
+                                     mkU64(0)), get_gpr_dw0(r2));
+   }
+
+   return "bctgr";
+}
+
+static HChar *
+s390_irgen_BCT(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1)));
+   if_not_condition_goto_computed(binop(Iop_CmpEQ32, get_gpr_w1(r1), mkU32(0)),
+                                  mkexpr(op2addr));
+
+   return "bct";
+}
+
+static HChar *
+s390_irgen_BCTG(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1)));
+   if_not_condition_goto_computed(binop(Iop_CmpEQ64, get_gpr_dw0(r1), mkU64(0)),
+                                  mkexpr(op2addr));
+
+   return "bctg";
+}
+
+static HChar *
+s390_irgen_BXH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_I32);
+
+   assign(value, get_gpr_w1(r3 | 1));
+   put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
+   if_not_condition_goto_computed(binop(Iop_CmpLE32S, get_gpr_w1(r1),
+                                  mkexpr(value)), mkexpr(op2addr));
+
+   return "bxh";
+}
+
+static HChar *
+s390_irgen_BXHG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_I64);
+
+   assign(value, get_gpr_dw0(r3 | 1));
+   put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
+   if_not_condition_goto_computed(binop(Iop_CmpLE64S, get_gpr_dw0(r1),
+                                  mkexpr(value)), mkexpr(op2addr));
+
+   return "bxhg";
+}
+
+static HChar *
+s390_irgen_BXLE(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_I32);
+
+   assign(value, get_gpr_w1(r3 | 1));
+   put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
+   if_not_condition_goto_computed(binop(Iop_CmpLT32S, mkexpr(value),
+                                  get_gpr_w1(r1)), mkexpr(op2addr));
+
+   return "bxle";
+}
+
+static HChar *
+s390_irgen_BXLEG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_I64);
+
+   assign(value, get_gpr_dw0(r3 | 1));
+   put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
+   if_not_condition_goto_computed(binop(Iop_CmpLT64S, mkexpr(value),
+                                  get_gpr_dw0(r1)), mkexpr(op2addr));
+
+   return "bxleg";
+}
+
+static HChar *
+s390_irgen_BRAS(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 4ULL));
+   call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "bras";
+}
+
+static HChar *
+s390_irgen_BRASL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 6ULL));
+   call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
+
+   return "brasl";
+}
+
+static HChar *
+s390_irgen_BRC(UChar r1, UShort i2)
+{
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (r1 == 0) {
+   } else {
+      if (r1 == 15) {
+         always_goto_and_chase(
+               guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+      } else {
+         assign(cond, s390_call_calculate_cond(r1));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+      }
+   }
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(XMNM, PCREL), S390_XMNM_BRC, r1, (Int)(Short)i2);
+
+   return "brc";
+}
+
+static HChar *
+s390_irgen_BRCL(UChar r1, UInt i2)
+{
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (r1 == 0) {
+   } else {
+      if (r1 == 15) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
+      } else {
+         assign(cond, s390_call_calculate_cond(r1));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
+      }
+   }
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC2(XMNM, PCREL), S390_XMNM_BRCL, r1, i2);
+
+   return "brcl";
+}
+
+static HChar *
+s390_irgen_BRCT(UChar r1, UShort i2)
+{
+   put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1)));
+   if_condition_goto(binop(Iop_CmpNE32, get_gpr_w1(r1), mkU32(0)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brct";
+}
+
+static HChar *
+s390_irgen_BRCTG(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE64, get_gpr_dw0(r1), mkU64(0)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brctg";
+}
+
+static HChar *
+s390_irgen_BRXH(UChar r1, UChar r3, UShort i2)
+{
+   IRTemp value = newTemp(Ity_I32);
+
+   assign(value, get_gpr_w1(r3 | 1));
+   put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
+   if_condition_goto(binop(Iop_CmpLT32S, mkexpr(value), get_gpr_w1(r1)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brxh";
+}
+
+static HChar *
+s390_irgen_BRXHG(UChar r1, UChar r3, UShort i2)
+{
+   IRTemp value = newTemp(Ity_I64);
+
+   assign(value, get_gpr_dw0(r3 | 1));
+   put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
+   if_condition_goto(binop(Iop_CmpLT64S, mkexpr(value), get_gpr_dw0(r1)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brxhg";
+}
+
+static HChar *
+s390_irgen_BRXLE(UChar r1, UChar r3, UShort i2)
+{
+   IRTemp value = newTemp(Ity_I32);
+
+   assign(value, get_gpr_w1(r3 | 1));
+   put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
+   if_condition_goto(binop(Iop_CmpLE32S, get_gpr_w1(r1), mkexpr(value)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brxle";
+}
+
+static HChar *
+s390_irgen_BRXLG(UChar r1, UChar r3, UShort i2)
+{
+   IRTemp value = newTemp(Ity_I64);
+
+   assign(value, get_gpr_dw0(r3 | 1));
+   put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
+   if_condition_goto(binop(Iop_CmpLE64S, get_gpr_dw0(r1), mkexpr(value)),
+                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
+
+   return "brxlg";
+}
+
+static HChar *
+s390_irgen_CR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cr";
+}
+
+static HChar *
+s390_irgen_CGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgr";
+}
+
+static HChar *
+s390_irgen_CGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgfr";
+}
+
+static HChar *
+s390_irgen_C(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "c";
+}
+
+static HChar *
+s390_irgen_CY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cy";
+}
+
+static HChar *
+s390_irgen_CG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cg";
+}
+
+static HChar *
+s390_irgen_CGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgf";
+}
+
+static HChar *
+s390_irgen_CFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Int)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+
+   return "cfi";
+}
+
+static HChar *
+s390_irgen_CGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Long)(Int)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+
+   return "cgfi";
+}
+
+static HChar *
+s390_irgen_CRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+          i2 << 1))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "crl";
+}
+
+static HChar *
+s390_irgen_CGRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+          i2 << 1))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgrl";
+}
+
+static HChar *
+s390_irgen_CGFRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgfrl";
+}
+
+static HChar *
+s390_irgen_CRB(UChar r1, UChar r2, UChar m3, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         assign(op2, get_gpr_w1(r2));
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "crb";
+}
+
+static HChar *
+s390_irgen_CGRB(UChar r1, UChar r2, UChar m3, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         assign(op2, get_gpr_dw0(r2));
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "cgrb";
+}
+
+static HChar *
+s390_irgen_CRJ(UChar r1, UChar r2, UShort i4, UChar m3)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(
+                guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         assign(op2, get_gpr_w1(r2));
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "crj";
+}
+
+static HChar *
+s390_irgen_CGRJ(UChar r1, UChar r2, UShort i4, UChar m3)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(
+                guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         assign(op2, get_gpr_dw0(r2));
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "cgrj";
+}
+
+static HChar *
+s390_irgen_CIB(UChar r1, UChar m3, UChar i2, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         op2 = (Int)(Char)i2;
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                mktemp(Ity_I32, mkU32((UInt)op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "cib";
+}
+
+static HChar *
+s390_irgen_CGIB(UChar r1, UChar m3, UChar i2, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         op2 = (Long)(Char)i2;
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                mktemp(Ity_I64, mkU64((ULong)op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "cgib";
+}
+
+static HChar *
+s390_irgen_CIJ(UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         op2 = (Int)(Char)i2;
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                mktemp(Ity_I32, mkU32((UInt)op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "cij";
+}
+
+static HChar *
+s390_irgen_CGIJ(UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         op2 = (Long)(Char)i2;
+         assign(icc, s390_call_calculate_iccSS(S390_CC_OP_SIGNED_COMPARE, op1,
+                mktemp(Ity_I64, mkU64((ULong)op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "cgij";
+}
+
+static HChar *
+s390_irgen_CH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "ch";
+}
+
+static HChar *
+s390_irgen_CHY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "chy";
+}
+
+static HChar *
+s390_irgen_CGH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkexpr(op2addr))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cgh";
+}
+
+static HChar *
+s390_irgen_CHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Int)(Short)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+
+   return "chi";
+}
+
+static HChar *
+s390_irgen_CGHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Long)(Short)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+
+   return "cghi";
+}
+
+static HChar *
+s390_irgen_CHHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   Short op2;
+
+   assign(op1, load(Ity_I16, mkexpr(op1addr)));
+   op2 = (Short)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I16,
+                       mkU16((UShort)op2)));
+
+   return "chhsi";
+}
+
+static HChar *
+s390_irgen_CHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+
+   assign(op1, load(Ity_I32, mkexpr(op1addr)));
+   op2 = (Int)(Short)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+
+   return "chsi";
+}
+
+static HChar *
+s390_irgen_CGHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Long op2;
+
+   assign(op1, load(Ity_I64, mkexpr(op1addr)));
+   op2 = (Long)(Short)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I64,
+                       mkU64((ULong)op2)));
+
+   return "cghsi";
+}
+
+static HChar *
+s390_irgen_CHRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "chrl";
+}
+
+static HChar *
+s390_irgen_CGHRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "cghrl";
+}
+
+static HChar *
+s390_irgen_CHHR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, get_gpr_w0(r2));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "chhr";
+}
+
+static HChar *
+s390_irgen_CHLR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, get_gpr_w1(r2));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "chlr";
+}
+
+static HChar *
+s390_irgen_CHF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
+
+   return "chf";
+}
+
+static HChar *
+s390_irgen_CIH(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = (Int)i2;
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32((UInt)op2)));
+
+   return "cih";
+}
+
+static HChar *
+s390_irgen_CLR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clr";
+}
+
+static HChar *
+s390_irgen_CLGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clgr";
+}
+
+static HChar *
+s390_irgen_CLGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, get_gpr_w1(r2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clgfr";
+}
+
+static HChar *
+s390_irgen_CL(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "cl";
+}
+
+static HChar *
+s390_irgen_CLY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "cly";
+}
+
+static HChar *
+s390_irgen_CLG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clg";
+}
+
+static HChar *
+s390_irgen_CLGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clgf";
+}
+
+static HChar *
+s390_irgen_CLFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+
+   return "clfi";
+}
+
+static HChar *
+s390_irgen_CLGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (ULong)i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I64,
+                       mkU64(op2)));
+
+   return "clgfi";
+}
+
+static HChar *
+s390_irgen_CLI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I8,
+                       mkU8(op2)));
+
+   return "cli";
+}
+
+static HChar *
+s390_irgen_CLIY(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I8,
+                       mkU8(op2)));
+
+   return "cliy";
+}
+
+static HChar *
+s390_irgen_CLFHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+
+   assign(op1, load(Ity_I32, mkexpr(op1addr)));
+   op2 = (UInt)i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+
+   return "clfhsi";
+}
+
+static HChar *
+s390_irgen_CLGHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+
+   assign(op1, load(Ity_I64, mkexpr(op1addr)));
+   op2 = (ULong)i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I64,
+                       mkU64(op2)));
+
+   return "clghsi";
+}
+
+static HChar *
+s390_irgen_CLHHSI(UShort i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+
+   assign(op1, load(Ity_I16, mkexpr(op1addr)));
+   op2 = i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I16,
+                       mkU16(op2)));
+
+   return "clhhsi";
+}
+
+static HChar *
+s390_irgen_CLRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+          i2 << 1))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clrl";
+}
+
+static HChar *
+s390_irgen_CLGRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+          i2 << 1))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clgrl";
+}
+
+static HChar *
+s390_irgen_CLGFRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clgfrl";
+}
+
+static HChar *
+s390_irgen_CLHRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clhrl";
+}
+
+static HChar *
+s390_irgen_CLGHRL(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
+          ((ULong)(Long)(Int)i2 << 1)))));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clghrl";
+}
+
+static HChar *
+s390_irgen_CLRB(UChar r1, UChar r2, UChar m3, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         assign(op2, get_gpr_w1(r2));
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "clrb";
+}
+
+static HChar *
+s390_irgen_CLGRB(UChar r1, UChar r2, UChar m3, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         assign(op2, get_gpr_dw0(r2));
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "clgrb";
+}
+
+static HChar *
+s390_irgen_CLRJ(UChar r1, UChar r2, UShort i4, UChar m3)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         assign(op2, get_gpr_w1(r2));
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "clrj";
+}
+
+static HChar *
+s390_irgen_CLGRJ(UChar r1, UChar r2, UShort i4, UChar m3)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         assign(op2, get_gpr_dw0(r2));
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                op2));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "clgrj";
+}
+
+static HChar *
+s390_irgen_CLIB(UChar r1, UChar m3, UChar i2, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         op2 = (UInt)i2;
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                mktemp(Ity_I32, mkU32(op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "clib";
+}
+
+static HChar *
+s390_irgen_CLGIB(UChar r1, UChar m3, UChar i2, IRTemp op4addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto(mkexpr(op4addr));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         op2 = (ULong)i2;
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                mktemp(Ity_I64, mkU64(op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_not_condition_goto_computed(binop(Iop_CmpEQ32, mkexpr(cond),
+                                        mkU32(0)), mkexpr(op4addr));
+      }
+   }
+
+   return "clgib";
+}
+
+static HChar *
+s390_irgen_CLIJ(UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_w1(r1));
+         op2 = (UInt)i2;
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                mktemp(Ity_I32, mkU32(op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "clij";
+}
+
+static HChar *
+s390_irgen_CLGIJ(UChar r1, UChar m3, UShort i4, UChar i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+   IRTemp icc = newTemp(Ity_I32);
+   IRTemp cond = newTemp(Ity_I32);
+
+   if (m3 == 0) {
+   } else {
+      if (m3 == 14) {
+         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+      } else {
+         assign(op1, get_gpr_dw0(r1));
+         op2 = (ULong)i2;
+         assign(icc, s390_call_calculate_iccZZ(S390_CC_OP_UNSIGNED_COMPARE, op1,
+                mktemp(Ity_I64, mkU64(op2))));
+         assign(cond, binop(Iop_And32, binop(Iop_Shl32, mkU32(m3),
+                unop(Iop_32to8, mkexpr(icc))), mkU32(8)));
+         if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
+                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
+
+      }
+   }
+
+   return "clgij";
+}
+
+static HChar *
+s390_irgen_CLM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp b0 = newTemp(Ity_I32);
+   IRTemp b1 = newTemp(Ity_I32);
+   IRTemp b2 = newTemp(Ity_I32);
+   IRTemp b3 = newTemp(Ity_I32);
+   IRTemp c0 = newTemp(Ity_I32);
+   IRTemp c1 = newTemp(Ity_I32);
+   IRTemp c2 = newTemp(Ity_I32);
+   IRTemp c3 = newTemp(Ity_I32);
+   UChar n;
+
+   n = 0;
+   if ((r3 & 8) != 0) {
+      assign(b0, unop(Iop_8Uto32, get_gpr_b4(r1)));
+      assign(c0, unop(Iop_8Uto32, load(Ity_I8, mkexpr(op2addr))));
+      n = n + 1;
+   } else {
+      assign(b0, mkU32(0));
+      assign(c0, mkU32(0));
+   }
+   if ((r3 & 4) != 0) {
+      assign(b1, unop(Iop_8Uto32, get_gpr_b5(r1)));
+      assign(c1, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b1, mkU32(0));
+      assign(c1, mkU32(0));
+   }
+   if ((r3 & 2) != 0) {
+      assign(b2, unop(Iop_8Uto32, get_gpr_b6(r1)));
+      assign(c2, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b2, mkU32(0));
+      assign(c2, mkU32(0));
+   }
+   if ((r3 & 1) != 0) {
+      assign(b3, unop(Iop_8Uto32, get_gpr_b7(r1)));
+      assign(c3, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b3, mkU32(0));
+      assign(c3, mkU32(0));
+   }
+   assign(op1, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(b0), mkU8(24)), binop(Iop_Shl32, mkexpr(b1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(b2), mkU8(8))), mkexpr(b3)));
+   assign(op2, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(c0), mkU8(24)), binop(Iop_Shl32, mkexpr(c1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(c2), mkU8(8))), mkexpr(c3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clm";
+}
+
+static HChar *
+s390_irgen_CLMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp b0 = newTemp(Ity_I32);
+   IRTemp b1 = newTemp(Ity_I32);
+   IRTemp b2 = newTemp(Ity_I32);
+   IRTemp b3 = newTemp(Ity_I32);
+   IRTemp c0 = newTemp(Ity_I32);
+   IRTemp c1 = newTemp(Ity_I32);
+   IRTemp c2 = newTemp(Ity_I32);
+   IRTemp c3 = newTemp(Ity_I32);
+   UChar n;
+
+   n = 0;
+   if ((r3 & 8) != 0) {
+      assign(b0, unop(Iop_8Uto32, get_gpr_b4(r1)));
+      assign(c0, unop(Iop_8Uto32, load(Ity_I8, mkexpr(op2addr))));
+      n = n + 1;
+   } else {
+      assign(b0, mkU32(0));
+      assign(c0, mkU32(0));
+   }
+   if ((r3 & 4) != 0) {
+      assign(b1, unop(Iop_8Uto32, get_gpr_b5(r1)));
+      assign(c1, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b1, mkU32(0));
+      assign(c1, mkU32(0));
+   }
+   if ((r3 & 2) != 0) {
+      assign(b2, unop(Iop_8Uto32, get_gpr_b6(r1)));
+      assign(c2, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b2, mkU32(0));
+      assign(c2, mkU32(0));
+   }
+   if ((r3 & 1) != 0) {
+      assign(b3, unop(Iop_8Uto32, get_gpr_b7(r1)));
+      assign(c3, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b3, mkU32(0));
+      assign(c3, mkU32(0));
+   }
+   assign(op1, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(b0), mkU8(24)), binop(Iop_Shl32, mkexpr(b1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(b2), mkU8(8))), mkexpr(b3)));
+   assign(op2, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(c0), mkU8(24)), binop(Iop_Shl32, mkexpr(c1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(c2), mkU8(8))), mkexpr(c3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clmy";
+}
+
+static HChar *
+s390_irgen_CLMH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp b0 = newTemp(Ity_I32);
+   IRTemp b1 = newTemp(Ity_I32);
+   IRTemp b2 = newTemp(Ity_I32);
+   IRTemp b3 = newTemp(Ity_I32);
+   IRTemp c0 = newTemp(Ity_I32);
+   IRTemp c1 = newTemp(Ity_I32);
+   IRTemp c2 = newTemp(Ity_I32);
+   IRTemp c3 = newTemp(Ity_I32);
+   UChar n;
+
+   n = 0;
+   if ((r3 & 8) != 0) {
+      assign(b0, unop(Iop_8Uto32, get_gpr_b0(r1)));
+      assign(c0, unop(Iop_8Uto32, load(Ity_I8, mkexpr(op2addr))));
+      n = n + 1;
+   } else {
+      assign(b0, mkU32(0));
+      assign(c0, mkU32(0));
+   }
+   if ((r3 & 4) != 0) {
+      assign(b1, unop(Iop_8Uto32, get_gpr_b1(r1)));
+      assign(c1, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b1, mkU32(0));
+      assign(c1, mkU32(0));
+   }
+   if ((r3 & 2) != 0) {
+      assign(b2, unop(Iop_8Uto32, get_gpr_b2(r1)));
+      assign(c2, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b2, mkU32(0));
+      assign(c2, mkU32(0));
+   }
+   if ((r3 & 1) != 0) {
+      assign(b3, unop(Iop_8Uto32, get_gpr_b3(r1)));
+      assign(c3, unop(Iop_8Uto32, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr),
+             mkU64(n)))));
+      n = n + 1;
+   } else {
+      assign(b3, mkU32(0));
+      assign(c3, mkU32(0));
+   }
+   assign(op1, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(b0), mkU8(24)), binop(Iop_Shl32, mkexpr(b1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(b2), mkU8(8))), mkexpr(b3)));
+   assign(op2, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Or32, binop(Iop_Shl32,
+          mkexpr(c0), mkU8(24)), binop(Iop_Shl32, mkexpr(c1), mkU8(16))),
+          binop(Iop_Shl32, mkexpr(c2), mkU8(8))), mkexpr(c3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clmh";
+}
+
+static HChar *
+s390_irgen_CLHHR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, get_gpr_w0(r2));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clhhr";
+}
+
+static HChar *
+s390_irgen_CLHLR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, get_gpr_w1(r2));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clhlr";
+}
+
+static HChar *
+s390_irgen_CLHF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
+
+   return "clhf";
+}
+
+static HChar *
+s390_irgen_CLIH(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+
+   return "clih";
+}
+
+static HChar *
+s390_irgen_CPYA(UChar r1, UChar r2)
+{
+   put_ar_w0(r1, get_ar_w0(r2));
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, AR, AR), "cpya", r1, r2);
+
+   return "cpya";
+}
+
+static HChar *
+s390_irgen_XR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   if (r1 == r2) {
+      assign(result, mkU32(0));
+   } else {
+      assign(op1, get_gpr_w1(r1));
+      assign(op2, get_gpr_w1(r2));
+      assign(result, binop(Iop_Xor32, mkexpr(op1), mkexpr(op2)));
+   }
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "xr";
+}
+
+static HChar *
+s390_irgen_XGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   if (r1 == r2) {
+      assign(result, mkU64(0));
+   } else {
+      assign(op1, get_gpr_dw0(r1));
+      assign(op2, get_gpr_dw0(r2));
+      assign(result, binop(Iop_Xor64, mkexpr(op1), mkexpr(op2)));
+   }
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "xgr";
+}
+
+static HChar *
+s390_irgen_XRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "xrk";
+}
+
+static HChar *
+s390_irgen_XGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "xgrk";
+}
+
+static HChar *
+s390_irgen_X(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Xor32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "x";
+}
+
+static HChar *
+s390_irgen_XY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Xor32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "xy";
+}
+
+static HChar *
+s390_irgen_XG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Xor64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "xg";
+}
+
+static HChar *
+s390_irgen_XI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_Xor8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "xi";
+}
+
+static HChar *
+s390_irgen_XIY(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_Xor8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "xiy";
+}
+
+static HChar *
+s390_irgen_XIHF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Xor32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "xihf";
+}
+
+static HChar *
+s390_irgen_XILF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Xor32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "xilf";
+}
+
+static HChar *
+s390_irgen_EAR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, get_ar_w0(r2));
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, GPR, AR), "ear", r1, r2);
+
+   return "ear";
+}
+
+static HChar *
+s390_irgen_IC(UChar r1, IRTemp op2addr)
+{
+   put_gpr_b7(r1, load(Ity_I8, mkexpr(op2addr)));
+
+   return "ic";
+}
+
+static HChar *
+s390_irgen_ICY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_b7(r1, load(Ity_I8, mkexpr(op2addr)));
+
+   return "icy";
+}
+
+static HChar *
+s390_irgen_ICM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar n;
+   IRTemp result = newTemp(Ity_I32);
+   UInt mask;
+
+   n = 0;
+   mask = (UInt)r3;
+   if ((mask & 8) != 0) {
+      put_gpr_b4(r1, load(Ity_I8, mkexpr(op2addr)));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      put_gpr_b5(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      put_gpr_b6(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      put_gpr_b7(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   assign(result, get_gpr_w1(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_INSERT_CHAR_MASK_32, result, mktemp(Ity_I32,
+                       mkU32(mask)));
+
+   return "icm";
+}
+
+static HChar *
+s390_irgen_ICMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar n;
+   IRTemp result = newTemp(Ity_I32);
+   UInt mask;
+
+   n = 0;
+   mask = (UInt)r3;
+   if ((mask & 8) != 0) {
+      put_gpr_b4(r1, load(Ity_I8, mkexpr(op2addr)));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      put_gpr_b5(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      put_gpr_b6(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      put_gpr_b7(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   assign(result, get_gpr_w1(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_INSERT_CHAR_MASK_32, result, mktemp(Ity_I32,
+                       mkU32(mask)));
+
+   return "icmy";
+}
+
+static HChar *
+s390_irgen_ICMH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar n;
+   IRTemp result = newTemp(Ity_I32);
+   UInt mask;
+
+   n = 0;
+   mask = (UInt)r3;
+   if ((mask & 8) != 0) {
+      put_gpr_b0(r1, load(Ity_I8, mkexpr(op2addr)));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      put_gpr_b1(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      put_gpr_b2(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      put_gpr_b3(r1, load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkU64(n))));
+
+      n = n + 1;
+   }
+   assign(result, get_gpr_w0(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_INSERT_CHAR_MASK_32, result, mktemp(Ity_I32,
+                       mkU32(mask)));
+
+   return "icmh";
+}
+
+static HChar *
+s390_irgen_IIHF(UChar r1, UInt i2)
+{
+   put_gpr_w0(r1, mkU32(i2));
+
+   return "iihf";
+}
+
+static HChar *
+s390_irgen_IIHH(UChar r1, UShort i2)
+{
+   put_gpr_hw0(r1, mkU16(i2));
+
+   return "iihh";
+}
+
+static HChar *
+s390_irgen_IIHL(UChar r1, UShort i2)
+{
+   put_gpr_hw1(r1, mkU16(i2));
+
+   return "iihl";
+}
+
+static HChar *
+s390_irgen_IILF(UChar r1, UInt i2)
+{
+   put_gpr_w1(r1, mkU32(i2));
+
+   return "iilf";
+}
+
+static HChar *
+s390_irgen_IILH(UChar r1, UShort i2)
+{
+   put_gpr_hw2(r1, mkU16(i2));
+
+   return "iilh";
+}
+
+static HChar *
+s390_irgen_IILL(UChar r1, UShort i2)
+{
+   put_gpr_hw3(r1, mkU16(i2));
+
+   return "iill";
+}
+
+static HChar *
+s390_irgen_LR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, get_gpr_w1(r2));
+
+   return "lr";
+}
+
+static HChar *
+s390_irgen_LGR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, get_gpr_dw0(r2));
+
+   return "lgr";
+}
+
+static HChar *
+s390_irgen_LGFR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_32Sto64, get_gpr_w1(r2)));
+
+   return "lgfr";
+}
+
+static HChar *
+s390_irgen_L(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, load(Ity_I32, mkexpr(op2addr)));
+
+   return "l";
+}
+
+static HChar *
+s390_irgen_LY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, load(Ity_I32, mkexpr(op2addr)));
+
+   return "ly";
+}
+
+static HChar *
+s390_irgen_LG(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, load(Ity_I64, mkexpr(op2addr)));
+
+   return "lg";
+}
+
+static HChar *
+s390_irgen_LGF(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+
+   return "lgf";
+}
+
+static HChar *
+s390_irgen_LGFI(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, mkU64((ULong)(Long)(Int)i2));
+
+   return "lgfi";
+}
+
+static HChar *
+s390_irgen_LRL(UChar r1, UInt i2)
+{
+   put_gpr_w1(r1, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+              i2 << 1))));
+
+   return "lrl";
+}
+
+static HChar *
+s390_irgen_LGRL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
+               i2 << 1))));
+
+   return "lgrl";
+}
+
+static HChar *
+s390_irgen_LGFRL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
+               ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "lgfrl";
+}
+
+static HChar *
+s390_irgen_LA(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, mkexpr(op2addr));
+
+   return "la";
+}
+
+static HChar *
+s390_irgen_LAY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, mkexpr(op2addr));
+
+   return "lay";
+}
+
+static HChar *
+s390_irgen_LAE(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, mkexpr(op2addr));
+
+   return "lae";
+}
+
+static HChar *
+s390_irgen_LAEY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, mkexpr(op2addr));
+
+   return "laey";
+}
+
+static HChar *
+s390_irgen_LARL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)));
+
+   return "larl";
+}
+
+static HChar *
+s390_irgen_LAA(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_32, op2, op3);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_w1(r1, mkexpr(op2));
+
+   return "laa";
+}
+
+static HChar *
+s390_irgen_LAAG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_ADD_64, op2, op3);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_dw0(r1, mkexpr(op2));
+
+   return "laag";
+}
+
+static HChar *
+s390_irgen_LAAL(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Add32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_32, op2, op3);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_w1(r1, mkexpr(op2));
+
+   return "laal";
+}
+
+static HChar *
+s390_irgen_LAALG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Add64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_ADD_64, op2, op3);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_dw0(r1, mkexpr(op2));
+
+   return "laalg";
+}
+
+static HChar *
+s390_irgen_LAN(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_w1(r1, mkexpr(op2));
+
+   return "lan";
+}
+
+static HChar *
+s390_irgen_LANG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_dw0(r1, mkexpr(op2));
+
+   return "lang";
+}
+
+static HChar *
+s390_irgen_LAX(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_w1(r1, mkexpr(op2));
+
+   return "lax";
+}
+
+static HChar *
+s390_irgen_LAXG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_dw0(r1, mkexpr(op2));
+
+   return "laxg";
+}
+
+static HChar *
+s390_irgen_LAO(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_w1(r1, mkexpr(op2));
+
+   return "lao";
+}
+
+static HChar *
+s390_irgen_LAOG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op2addr), mkexpr(result));
+   put_gpr_dw0(r1, mkexpr(op2));
+
+   return "laog";
+}
+
+static HChar *
+s390_irgen_LTR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   put_gpr_w1(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "ltr";
+}
+
+static HChar *
+s390_irgen_LTGR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   put_gpr_dw0(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "ltgr";
+}
+
+static HChar *
+s390_irgen_LTGFR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   put_gpr_dw0(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "ltgfr";
+}
+
+static HChar *
+s390_irgen_LT(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   put_gpr_w1(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "lt";
+}
+
+static HChar *
+s390_irgen_LTG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   put_gpr_dw0(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "ltg";
+}
+
+static HChar *
+s390_irgen_LTGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+   put_gpr_dw0(r1, mkexpr(op2));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "ltgf";
+}
+
+static HChar *
+s390_irgen_LBR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, unop(Iop_8Sto32, get_gpr_b7(r2)));
+
+   return "lbr";
+}
+
+static HChar *
+s390_irgen_LGBR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_8Sto64, get_gpr_b7(r2)));
+
+   return "lgbr";
+}
+
+static HChar *
+s390_irgen_LB(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, unop(Iop_8Sto32, load(Ity_I8, mkexpr(op2addr))));
+
+   return "lb";
+}
+
+static HChar *
+s390_irgen_LGB(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_8Sto64, load(Ity_I8, mkexpr(op2addr))));
+
+   return "lgb";
+}
+
+static HChar *
+s390_irgen_LBH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w0(r1, unop(Iop_8Sto32, load(Ity_I8, mkexpr(op2addr))));
+
+   return "lbh";
+}
+
+static HChar *
+s390_irgen_LCR(UChar r1, UChar r2)
+{
+   Int op1;
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   op1 = 0;
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Sub32, mkU32((UInt)op1), mkexpr(op2)));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, mktemp(Ity_I32, mkU32((UInt)
+                       op1)), op2);
+
+   return "lcr";
+}
+
+static HChar *
+s390_irgen_LCGR(UChar r1, UChar r2)
+{
+   Long op1;
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   op1 = 0ULL;
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Sub64, mkU64((ULong)op1), mkexpr(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, mktemp(Ity_I64, mkU64((ULong)
+                       op1)), op2);
+
+   return "lcgr";
+}
+
+static HChar *
+s390_irgen_LCGFR(UChar r1, UChar r2)
+{
+   Long op1;
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   op1 = 0ULL;
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   assign(result, binop(Iop_Sub64, mkU64((ULong)op1), mkexpr(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, mktemp(Ity_I64, mkU64((ULong)
+                       op1)), op2);
+
+   return "lcgfr";
+}
+
+static HChar *
+s390_irgen_LHR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, unop(Iop_16Sto32, get_gpr_hw3(r2)));
+
+   return "lhr";
+}
+
+static HChar *
+s390_irgen_LGHR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_16Sto64, get_gpr_hw3(r2)));
+
+   return "lghr";
+}
+
+static HChar *
+s390_irgen_LH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+
+   return "lh";
+}
+
+static HChar *
+s390_irgen_LHY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+
+   return "lhy";
+}
+
+static HChar *
+s390_irgen_LGH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkexpr(op2addr))));
+
+   return "lgh";
+}
+
+static HChar *
+s390_irgen_LHI(UChar r1, UShort i2)
+{
+   put_gpr_w1(r1, mkU32((UInt)(Int)(Short)i2));
+
+   return "lhi";
+}
+
+static HChar *
+s390_irgen_LGHI(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64((ULong)(Long)(Short)i2));
+
+   return "lghi";
+}
+
+static HChar *
+s390_irgen_LHRL(UChar r1, UInt i2)
+{
+   put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
+              ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "lhrl";
+}
+
+static HChar *
+s390_irgen_LGHRL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
+               ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "lghrl";
+}
+
+static HChar *
+s390_irgen_LHH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w0(r1, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+
+   return "lhh";
+}
+
+static HChar *
+s390_irgen_LFH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w0(r1, load(Ity_I32, mkexpr(op2addr)));
+
+   return "lfh";
+}
+
+static HChar *
+s390_irgen_LLGFR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_32Uto64, get_gpr_w1(r2)));
+
+   return "llgfr";
+}
+
+static HChar *
+s390_irgen_LLGF(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr))));
+
+   return "llgf";
+}
+
+static HChar *
+s390_irgen_LLGFRL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
+               ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "llgfrl";
+}
+
+static HChar *
+s390_irgen_LLCR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, unop(Iop_8Uto32, get_gpr_b7(r2)));
+
+   return "llcr";
+}
+
+static HChar *
+s390_irgen_LLGCR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_8Uto64, get_gpr_b7(r2)));
+
+   return "llgcr";
+}
+
+static HChar *
+s390_irgen_LLC(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, unop(Iop_8Uto32, load(Ity_I8, mkexpr(op2addr))));
+
+   return "llc";
+}
+
+static HChar *
+s390_irgen_LLGC(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_8Uto64, load(Ity_I8, mkexpr(op2addr))));
+
+   return "llgc";
+}
+
+static HChar *
+s390_irgen_LLCH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w0(r1, unop(Iop_8Uto32, load(Ity_I8, mkexpr(op2addr))));
+
+   return "llch";
+}
+
+static HChar *
+s390_irgen_LLHR(UChar r1, UChar r2)
+{
+   put_gpr_w1(r1, unop(Iop_16Uto32, get_gpr_hw3(r2)));
+
+   return "llhr";
+}
+
+static HChar *
+s390_irgen_LLGHR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_16Uto64, get_gpr_hw3(r2)));
+
+   return "llghr";
+}
+
+static HChar *
+s390_irgen_LLH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkexpr(op2addr))));
+
+   return "llh";
+}
+
+static HChar *
+s390_irgen_LLGH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkexpr(op2addr))));
+
+   return "llgh";
+}
+
+static HChar *
+s390_irgen_LLHRL(UChar r1, UInt i2)
+{
+   put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
+              ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "llhrl";
+}
+
+static HChar *
+s390_irgen_LLGHRL(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
+               ((ULong)(Long)(Int)i2 << 1)))));
+
+   return "llghrl";
+}
+
+static HChar *
+s390_irgen_LLHH(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w0(r1, unop(Iop_16Uto32, load(Ity_I16, mkexpr(op2addr))));
+
+   return "llhh";
+}
+
+static HChar *
+s390_irgen_LLIHF(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, mkU64(((ULong)i2) << 32));
+
+   return "llihf";
+}
+
+static HChar *
+s390_irgen_LLIHH(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64(((ULong)i2) << 48));
+
+   return "llihh";
+}
+
+static HChar *
+s390_irgen_LLIHL(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64(((ULong)i2) << 32));
+
+   return "llihl";
+}
+
+static HChar *
+s390_irgen_LLILF(UChar r1, UInt i2)
+{
+   put_gpr_dw0(r1, mkU64(i2));
+
+   return "llilf";
+}
+
+static HChar *
+s390_irgen_LLILH(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64(((ULong)i2) << 16));
+
+   return "llilh";
+}
+
+static HChar *
+s390_irgen_LLILL(UChar r1, UShort i2)
+{
+   put_gpr_dw0(r1, mkU64(i2));
+
+   return "llill";
+}
+
+static HChar *
+s390_irgen_LLGTR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_32Uto64, binop(Iop_And32, get_gpr_w1(r2),
+               mkU32(2147483647))));
+
+   return "llgtr";
+}
+
+static HChar *
+s390_irgen_LLGT(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, unop(Iop_32Uto64, binop(Iop_And32, load(Ity_I32,
+               mkexpr(op2addr)), mkU32(2147483647))));
+
+   return "llgt";
+}
+
+static HChar *
+s390_irgen_LNR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(result, mkite(binop(Iop_CmpLE32S, mkexpr(op2), mkU32(0)), mkexpr(op2),
+          binop(Iop_Sub32, mkU32(0), mkexpr(op2))));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_BITWISE, result);
+
+   return "lnr";
+}
+
+static HChar *
+s390_irgen_LNGR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, mkite(binop(Iop_CmpLE64S, mkexpr(op2), mkU64(0)), mkexpr(op2),
+          binop(Iop_Sub64, mkU64(0), mkexpr(op2))));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_BITWISE, result);
+
+   return "lngr";
+}
+
+static HChar *
+s390_irgen_LNGFR(UChar r1, UChar r2 __attribute__((unused)))
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r1)));
+   assign(result, mkite(binop(Iop_CmpLE64S, mkexpr(op2), mkU64(0)), mkexpr(op2),
+          binop(Iop_Sub64, mkU64(0), mkexpr(op2))));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_BITWISE, result);
+
+   return "lngfr";
+}
+
+static HChar *
+s390_irgen_LOCR(UChar m3, UChar r1, UChar r2)
+{
+   if_condition_goto(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)),
+                     guest_IA_next_instr);
+   put_gpr_w1(r1, get_gpr_w1(r2));
+
+   return "locr";
+}
+
+static HChar *
+s390_irgen_LOCGR(UChar m3, UChar r1, UChar r2)
+{
+   if_condition_goto(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)),
+                     guest_IA_next_instr);
+   put_gpr_dw0(r1, get_gpr_dw0(r2));
+
+   return "locgr";
+}
+
+static HChar *
+s390_irgen_LOC(UChar r1, IRTemp op2addr)
+{
+   /* condition is checked in format handler */
+   put_gpr_w1(r1, load(Ity_I32, mkexpr(op2addr)));
+
+   return "loc";
+}
+
+static HChar *
+s390_irgen_LOCG(UChar r1, IRTemp op2addr)
+{
+   /* condition is checked in format handler */
+   put_gpr_dw0(r1, load(Ity_I64, mkexpr(op2addr)));
+
+   return "locg";
+}
+
+static HChar *
+s390_irgen_LPQ(UChar r1, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, load(Ity_I64, mkexpr(op2addr)));
+   put_gpr_dw0(r1 + 1, load(Ity_I64, binop(Iop_Add64, mkexpr(op2addr), mkU64(8))
+               ));
+
+   return "lpq";
+}
+
+static HChar *
+s390_irgen_LPR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(result, mkite(binop(Iop_CmpLT32S, mkexpr(op2), mkU32(0)),
+          binop(Iop_Sub32, mkU32(0), mkexpr(op2)), mkexpr(op2)));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_POSITIVE_32, op2);
+
+   return "lpr";
+}
+
+static HChar *
+s390_irgen_LPGR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, mkite(binop(Iop_CmpLT64S, mkexpr(op2), mkU64(0)),
+          binop(Iop_Sub64, mkU64(0), mkexpr(op2)), mkexpr(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_POSITIVE_64, op2);
+
+   return "lpgr";
+}
+
+static HChar *
+s390_irgen_LPGFR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   assign(result, mkite(binop(Iop_CmpLT64S, mkexpr(op2), mkU64(0)),
+          binop(Iop_Sub64, mkU64(0), mkexpr(op2)), mkexpr(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_POSITIVE_64, op2);
+
+   return "lpgfr";
+}
+
+static HChar *
+s390_irgen_LRVR(UChar r1, UChar r2)
+{
+   IRTemp b0 = newTemp(Ity_I8);
+   IRTemp b1 = newTemp(Ity_I8);
+   IRTemp b2 = newTemp(Ity_I8);
+   IRTemp b3 = newTemp(Ity_I8);
+
+   assign(b3, get_gpr_b7(r2));
+   assign(b2, get_gpr_b6(r2));
+   assign(b1, get_gpr_b5(r2));
+   assign(b0, get_gpr_b4(r2));
+   put_gpr_b4(r1, mkexpr(b3));
+   put_gpr_b5(r1, mkexpr(b2));
+   put_gpr_b6(r1, mkexpr(b1));
+   put_gpr_b7(r1, mkexpr(b0));
+
+   return "lrvr";
+}
+
+static HChar *
+s390_irgen_LRVGR(UChar r1, UChar r2)
+{
+   IRTemp b0 = newTemp(Ity_I8);
+   IRTemp b1 = newTemp(Ity_I8);
+   IRTemp b2 = newTemp(Ity_I8);
+   IRTemp b3 = newTemp(Ity_I8);
+   IRTemp b4 = newTemp(Ity_I8);
+   IRTemp b5 = newTemp(Ity_I8);
+   IRTemp b6 = newTemp(Ity_I8);
+   IRTemp b7 = newTemp(Ity_I8);
+
+   assign(b7, get_gpr_b7(r2));
+   assign(b6, get_gpr_b6(r2));
+   assign(b5, get_gpr_b5(r2));
+   assign(b4, get_gpr_b4(r2));
+   assign(b3, get_gpr_b3(r2));
+   assign(b2, get_gpr_b2(r2));
+   assign(b1, get_gpr_b1(r2));
+   assign(b0, get_gpr_b0(r2));
+   put_gpr_b0(r1, mkexpr(b7));
+   put_gpr_b1(r1, mkexpr(b6));
+   put_gpr_b2(r1, mkexpr(b5));
+   put_gpr_b3(r1, mkexpr(b4));
+   put_gpr_b4(r1, mkexpr(b3));
+   put_gpr_b5(r1, mkexpr(b2));
+   put_gpr_b6(r1, mkexpr(b1));
+   put_gpr_b7(r1, mkexpr(b0));
+
+   return "lrvgr";
+}
+
+static HChar *
+s390_irgen_LRVH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I16);
+
+   assign(op2, load(Ity_I16, mkexpr(op2addr)));
+   put_gpr_b6(r1, unop(Iop_16to8, mkexpr(op2)));
+   put_gpr_b7(r1, unop(Iop_16HIto8, mkexpr(op2)));
+
+   return "lrvh";
+}
+
+static HChar *
+s390_irgen_LRV(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   put_gpr_b4(r1, unop(Iop_32to8, binop(Iop_And32, mkexpr(op2), mkU32(255))));
+   put_gpr_b5(r1, unop(Iop_32to8, binop(Iop_And32, binop(Iop_Shr32, mkexpr(op2),
+              mkU8(8)), mkU32(255))));
+   put_gpr_b6(r1, unop(Iop_32to8, binop(Iop_And32, binop(Iop_Shr32, mkexpr(op2),
+              mkU8(16)), mkU32(255))));
+   put_gpr_b7(r1, unop(Iop_32to8, binop(Iop_And32, binop(Iop_Shr32, mkexpr(op2),
+              mkU8(24)), mkU32(255))));
+
+   return "lrv";
+}
+
+static HChar *
+s390_irgen_LRVG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   put_gpr_b0(r1, unop(Iop_64to8, binop(Iop_And64, mkexpr(op2), mkU64(255))));
+   put_gpr_b1(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(8)), mkU64(255))));
+   put_gpr_b2(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(16)), mkU64(255))));
+   put_gpr_b3(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(24)), mkU64(255))));
+   put_gpr_b4(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(32)), mkU64(255))));
+   put_gpr_b5(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(40)), mkU64(255))));
+   put_gpr_b6(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(48)), mkU64(255))));
+   put_gpr_b7(r1, unop(Iop_64to8, binop(Iop_And64, binop(Iop_Shr64, mkexpr(op2),
+              mkU8(56)), mkU64(255))));
+
+   return "lrvg";
+}
+
+static HChar *
+s390_irgen_MVHHI(UShort i2, IRTemp op1addr)
+{
+   store(mkexpr(op1addr), mkU16(i2));
+
+   return "mvhhi";
+}
+
+static HChar *
+s390_irgen_MVHI(UShort i2, IRTemp op1addr)
+{
+   store(mkexpr(op1addr), mkU32((UInt)(Int)(Short)i2));
+
+   return "mvhi";
+}
+
+static HChar *
+s390_irgen_MVGHI(UShort i2, IRTemp op1addr)
+{
+   store(mkexpr(op1addr), mkU64((ULong)(Long)(Short)i2));
+
+   return "mvghi";
+}
+
+static HChar *
+s390_irgen_MVI(UChar i2, IRTemp op1addr)
+{
+   store(mkexpr(op1addr), mkU8(i2));
+
+   return "mvi";
+}
+
+static HChar *
+s390_irgen_MVIY(UChar i2, IRTemp op1addr)
+{
+   store(mkexpr(op1addr), mkU8(i2));
+
+   return "mviy";
+}
+
+static HChar *
+s390_irgen_MR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1 + 1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mr";
+}
+
+static HChar *
+s390_irgen_M(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1 + 1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "m";
+}
+
+static HChar *
+s390_irgen_MFY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1 + 1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mfy";
+}
+
+static HChar *
+s390_irgen_MH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I16);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I16, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), unop(Iop_16Sto32, mkexpr(op2))
+          ));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mh";
+}
+
+static HChar *
+s390_irgen_MHY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I16);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I16, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), unop(Iop_16Sto32, mkexpr(op2))
+          ));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mhy";
+}
+
+static HChar *
+s390_irgen_MHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Short op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Short)i2;
+   assign(result, binop(Iop_MullS32, mkexpr(op1), unop(Iop_16Sto32,
+          mkU16((UShort)op2))));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mhi";
+}
+
+static HChar *
+s390_irgen_MGHI(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Short op2;
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Short)i2;
+   assign(result, binop(Iop_MullS64, mkexpr(op1), unop(Iop_16Sto64,
+          mkU16((UShort)op2))));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "mghi";
+}
+
+static HChar *
+s390_irgen_MLR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1 + 1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_MullU32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "mlr";
+}
+
+static HChar *
+s390_irgen_MLGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1 + 1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_MullU64, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128HIto64, mkexpr(result)));
+   put_gpr_dw0(r1 + 1, unop(Iop_128to64, mkexpr(result)));
+
+   return "mlgr";
+}
+
+static HChar *
+s390_irgen_ML(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1 + 1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullU32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "ml";
+}
+
+static HChar *
+s390_irgen_MLG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1 + 1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullU64, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128HIto64, mkexpr(result)));
+   put_gpr_dw0(r1 + 1, unop(Iop_128to64, mkexpr(result)));
+
+   return "mlg";
+}
+
+static HChar *
+s390_irgen_MSR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "msr";
+}
+
+static HChar *
+s390_irgen_MSGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_MullS64, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "msgr";
+}
+
+static HChar *
+s390_irgen_MSGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_MullS64, mkexpr(op1), unop(Iop_32Sto64, mkexpr(op2))
+          ));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "msgfr";
+}
+
+static HChar *
+s390_irgen_MS(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "ms";
+}
+
+static HChar *
+s390_irgen_MSY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "msy";
+}
+
+static HChar *
+s390_irgen_MSG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS64, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "msg";
+}
+
+static HChar *
+s390_irgen_MSGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_MullS64, mkexpr(op1), unop(Iop_32Sto64, mkexpr(op2))
+          ));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "msgf";
+}
+
+static HChar *
+s390_irgen_MSFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   Int op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = (Int)i2;
+   assign(result, binop(Iop_MullS32, mkexpr(op1), mkU32((UInt)op2)));
+   put_gpr_w1(r1, unop(Iop_64to32, mkexpr(result)));
+
+   return "msfi";
+}
+
+static HChar *
+s390_irgen_MSGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   Int op2;
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (Int)i2;
+   assign(result, binop(Iop_MullS64, mkexpr(op1), unop(Iop_32Sto64, mkU32((UInt)
+          op2))));
+   put_gpr_dw0(r1, unop(Iop_128to64, mkexpr(result)));
+
+   return "msgfi";
+}
+
+static HChar *
+s390_irgen_OR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Or32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "or";
+}
+
+static HChar *
+s390_irgen_OGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Or64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ogr";
+}
+
+static HChar *
+s390_irgen_ORK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "ork";
+}
+
+static HChar *
+s390_irgen_OGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "ogrk";
+}
+
+static HChar *
+s390_irgen_O(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Or32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "o";
+}
+
+static HChar *
+s390_irgen_OY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Or32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "oy";
+}
+
+static HChar *
+s390_irgen_OG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Or64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "og";
+}
+
+static HChar *
+s390_irgen_OI(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_Or8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "oi";
+}
+
+static HChar *
+s390_irgen_OIY(UChar i2, IRTemp op1addr)
+{
+   IRTemp op1 = newTemp(Ity_I8);
+   UChar op2;
+   IRTemp result = newTemp(Ity_I8);
+
+   assign(op1, load(Ity_I8, mkexpr(op1addr)));
+   op2 = i2;
+   assign(result, binop(Iop_Or8, mkexpr(op1), mkU8(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   store(mkexpr(op1addr), mkexpr(result));
+
+   return "oiy";
+}
+
+static HChar *
+s390_irgen_OIHF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "oihf";
+}
+
+static HChar *
+s390_irgen_OIHH(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw0(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw0(r1, mkexpr(result));
+
+   return "oihh";
+}
+
+static HChar *
+s390_irgen_OIHL(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw1(r1, mkexpr(result));
+
+   return "oihl";
+}
+
+static HChar *
+s390_irgen_OILF(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "oilf";
+}
+
+static HChar *
+s390_irgen_OILH(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw2(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw2(r1, mkexpr(result));
+
+   return "oilh";
+}
+
+static HChar *
+s390_irgen_OILL(UChar r1, UShort i2)
+{
+   IRTemp op1 = newTemp(Ity_I16);
+   UShort op2;
+   IRTemp result = newTemp(Ity_I16);
+
+   assign(op1, get_gpr_hw3(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Or16, mkexpr(op1), mkU16(op2)));
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+   put_gpr_hw3(r1, mkexpr(result));
+
+   return "oill";
+}
+
+static HChar *
+s390_irgen_PFD(void)
+{
+
+   return "pfd";
+}
+
+static HChar *
+s390_irgen_PFDRL(void)
+{
+
+   return "pfdrl";
+}
+
+static HChar *
+s390_irgen_RLL(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp amount = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(amount, binop(Iop_And64, mkexpr(op2addr), mkU64(31)));
+   assign(op, get_gpr_w1(r3));
+   put_gpr_w1(r1, binop(Iop_Or32, binop(Iop_Shl32, mkexpr(op), unop(Iop_64to8,
+              mkexpr(amount))), binop(Iop_Shr32, mkexpr(op), unop(Iop_64to8,
+              binop(Iop_Sub64, mkU64(32), mkexpr(amount))))));
+
+   return "rll";
+}
+
+static HChar *
+s390_irgen_RLLG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp amount = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I64);
+
+   assign(amount, binop(Iop_And64, mkexpr(op2addr), mkU64(63)));
+   assign(op, get_gpr_dw0(r3));
+   put_gpr_dw0(r1, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(op), unop(Iop_64to8,
+               mkexpr(amount))), binop(Iop_Shr64, mkexpr(op), unop(Iop_64to8,
+               binop(Iop_Sub64, mkU64(64), mkexpr(amount))))));
+
+   return "rllg";
+}
+
+static HChar *
+s390_irgen_RNSBG(UChar r1, UChar r2, UChar i3, UChar i4, UChar i5)
+{
+   UChar from;
+   UChar to;
+   UChar rot;
+   UChar t_bit;
+   ULong mask;
+   ULong maskc;
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   from = i3 & 63;
+   to = i4 & 63;
+   rot = i5 & 63;
+   t_bit = i3 & 128;
+   assign(op2, rot == 0 ? get_gpr_dw0(r2) : binop(Iop_Or64, binop(Iop_Shl64,
+          get_gpr_dw0(r2), mkU8(rot)), binop(Iop_Shr64, get_gpr_dw0(r2),
+          mkU8(64 - rot))));
+   if (from <= to) {
+      mask = ~0ULL;
+      mask = (mask >> from) & (mask << (63 - to));
+      maskc = ~mask;
+   } else {
+      maskc = ~0ULL;
+      maskc = (maskc >> (to + 1)) & (maskc << (64 - from));
+      mask = ~maskc;
+   }
+   assign(result, binop(Iop_And64, binop(Iop_And64, get_gpr_dw0(r1), mkexpr(op2)
+          ), mkU64(mask)));
+   if (t_bit == 0) {
+      put_gpr_dw0(r1, binop(Iop_Or64, binop(Iop_And64, get_gpr_dw0(r1),
+                  mkU64(maskc)), mkexpr(result)));
+   }
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+
+   return "rnsbg";
+}
+
+static HChar *
+s390_irgen_RXSBG(UChar r1, UChar r2, UChar i3, UChar i4, UChar i5)
+{
+   UChar from;
+   UChar to;
+   UChar rot;
+   UChar t_bit;
+   ULong mask;
+   ULong maskc;
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   from = i3 & 63;
+   to = i4 & 63;
+   rot = i5 & 63;
+   t_bit = i3 & 128;
+   assign(op2, rot == 0 ? get_gpr_dw0(r2) : binop(Iop_Or64, binop(Iop_Shl64,
+          get_gpr_dw0(r2), mkU8(rot)), binop(Iop_Shr64, get_gpr_dw0(r2),
+          mkU8(64 - rot))));
+   if (from <= to) {
+      mask = ~0ULL;
+      mask = (mask >> from) & (mask << (63 - to));
+      maskc = ~mask;
+   } else {
+      maskc = ~0ULL;
+      maskc = (maskc >> (to + 1)) & (maskc << (64 - from));
+      mask = ~maskc;
+   }
+   assign(result, binop(Iop_And64, binop(Iop_Xor64, get_gpr_dw0(r1), mkexpr(op2)
+          ), mkU64(mask)));
+   if (t_bit == 0) {
+      put_gpr_dw0(r1, binop(Iop_Or64, binop(Iop_And64, get_gpr_dw0(r1),
+                  mkU64(maskc)), mkexpr(result)));
+   }
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+
+   return "rxsbg";
+}
+
+static HChar *
+s390_irgen_ROSBG(UChar r1, UChar r2, UChar i3, UChar i4, UChar i5)
+{
+   UChar from;
+   UChar to;
+   UChar rot;
+   UChar t_bit;
+   ULong mask;
+   ULong maskc;
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+
+   from = i3 & 63;
+   to = i4 & 63;
+   rot = i5 & 63;
+   t_bit = i3 & 128;
+   assign(op2, rot == 0 ? get_gpr_dw0(r2) : binop(Iop_Or64, binop(Iop_Shl64,
+          get_gpr_dw0(r2), mkU8(rot)), binop(Iop_Shr64, get_gpr_dw0(r2),
+          mkU8(64 - rot))));
+   if (from <= to) {
+      mask = ~0ULL;
+      mask = (mask >> from) & (mask << (63 - to));
+      maskc = ~mask;
+   } else {
+      maskc = ~0ULL;
+      maskc = (maskc >> (to + 1)) & (maskc << (64 - from));
+      mask = ~maskc;
+   }
+   assign(result, binop(Iop_And64, binop(Iop_Or64, get_gpr_dw0(r1), mkexpr(op2)
+          ), mkU64(mask)));
+   if (t_bit == 0) {
+      put_gpr_dw0(r1, binop(Iop_Or64, binop(Iop_And64, get_gpr_dw0(r1),
+                  mkU64(maskc)), mkexpr(result)));
+   }
+   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+
+   return "rosbg";
+}
+
+static HChar *
+s390_irgen_RISBG(UChar r1, UChar r2, UChar i3, UChar i4, UChar i5)
+{
+   UChar from;
+   UChar to;
+   UChar rot;
+   UChar z_bit;
+   ULong mask;
+   ULong maskc;
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   from = i3 & 63;
+   to = i4 & 63;
+   rot = i5 & 63;
+   z_bit = i4 & 128;
+   assign(op2, rot == 0 ? get_gpr_dw0(r2) : binop(Iop_Or64, binop(Iop_Shl64,
+          get_gpr_dw0(r2), mkU8(rot)), binop(Iop_Shr64, get_gpr_dw0(r2),
+          mkU8(64 - rot))));
+   if (from <= to) {
+      mask = ~0ULL;
+      mask = (mask >> from) & (mask << (63 - to));
+      maskc = ~mask;
+   } else {
+      maskc = ~0ULL;
+      maskc = (maskc >> (to + 1)) & (maskc << (64 - from));
+      mask = ~maskc;
+   }
+   if (z_bit == 0) {
+      put_gpr_dw0(r1, binop(Iop_Or64, binop(Iop_And64, get_gpr_dw0(r1),
+                  mkU64(maskc)), binop(Iop_And64, mkexpr(op2), mkU64(mask))));
+   } else {
+      put_gpr_dw0(r1, binop(Iop_And64, mkexpr(op2), mkU64(mask)));
+   }
+   assign(result, get_gpr_dw0(r1));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, op2);
+
+   return "risbg";
+}
+
+static HChar *
+s390_irgen_SAR(UChar r1, UChar r2)
+{
+   put_ar_w0(r1, get_gpr_w1(r2));
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, AR, GPR), "sar", r1, r2);
+
+   return "sar";
+}
+
+static HChar *
+s390_irgen_SLDA(UChar r1, IRTemp op2addr)
+{
+   IRTemp p1 = newTemp(Ity_I64);
+   IRTemp p2 = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   Long sign_mask;
+   IRTemp shift_amount = newTemp(Ity_I64);
+
+   assign(p1, unop(Iop_32Uto64, get_gpr_w1(r1)));
+   assign(p2, unop(Iop_32Uto64, get_gpr_w1(r1 + 1)));
+   assign(op, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(p1), mkU8(32)), mkexpr(p2)
+          ));
+   sign_mask = 1ULL << 63;
+   assign(shift_amount, binop(Iop_And64, mkexpr(op2addr), mkU64(63)));
+   assign(result, binop(Iop_Or64, binop(Iop_And64, binop(Iop_Shl64, mkexpr(op),
+          unop(Iop_64to8, mkexpr(shift_amount))), mkU64((ULong)(~sign_mask))),
+          binop(Iop_And64, mkexpr(op), mkU64((ULong)sign_mask))));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+   s390_cc_thunk_putZZ(S390_CC_OP_SHIFT_LEFT_64, op, shift_amount);
+
+   return "slda";
+}
+
+static HChar *
+s390_irgen_SLDL(UChar r1, IRTemp op2addr)
+{
+   IRTemp p1 = newTemp(Ity_I64);
+   IRTemp p2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(p1, unop(Iop_32Uto64, get_gpr_w1(r1)));
+   assign(p2, unop(Iop_32Uto64, get_gpr_w1(r1 + 1)));
+   assign(result, binop(Iop_Shl64, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(p1),
+          mkU8(32)), mkexpr(p2)), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "sldl";
+}
+
+static HChar *
+s390_irgen_SLA(UChar r1, IRTemp op2addr)
+{
+   IRTemp uop = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   UInt sign_mask;
+   IRTemp shift_amount = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r1));
+   assign(uop, get_gpr_w1(r1));
+   sign_mask = 2147483648U;
+   assign(shift_amount, binop(Iop_And64, mkexpr(op2addr), mkU64(63)));
+   assign(result, binop(Iop_Or32, binop(Iop_And32, binop(Iop_Shl32, mkexpr(uop),
+          unop(Iop_64to8, mkexpr(shift_amount))), mkU32(~sign_mask)),
+          binop(Iop_And32, mkexpr(uop), mkU32(sign_mask))));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putZZ(S390_CC_OP_SHIFT_LEFT_32, op, shift_amount);
+
+   return "sla";
+}
+
+static HChar *
+s390_irgen_SLAK(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp uop = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   UInt sign_mask;
+   IRTemp shift_amount = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r3));
+   assign(uop, get_gpr_w1(r3));
+   sign_mask = 2147483648U;
+   assign(shift_amount, binop(Iop_And64, mkexpr(op2addr), mkU64(63)));
+   assign(result, binop(Iop_Or32, binop(Iop_And32, binop(Iop_Shl32, mkexpr(uop),
+          unop(Iop_64to8, mkexpr(shift_amount))), mkU32(~sign_mask)),
+          binop(Iop_And32, mkexpr(uop), mkU32(sign_mask))));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putZZ(S390_CC_OP_SHIFT_LEFT_32, op, shift_amount);
+
+   return "slak";
+}
+
+static HChar *
+s390_irgen_SLAG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp uop = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   ULong sign_mask;
+   IRTemp shift_amount = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I64);
+
+   assign(op, get_gpr_dw0(r3));
+   assign(uop, get_gpr_dw0(r3));
+   sign_mask = 9223372036854775808ULL;
+   assign(shift_amount, binop(Iop_And64, mkexpr(op2addr), mkU64(63)));
+   assign(result, binop(Iop_Or64, binop(Iop_And64, binop(Iop_Shl64, mkexpr(uop),
+          unop(Iop_64to8, mkexpr(shift_amount))), mkU64(~sign_mask)),
+          binop(Iop_And64, mkexpr(uop), mkU64(sign_mask))));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putZZ(S390_CC_OP_SHIFT_LEFT_64, op, shift_amount);
+
+   return "slag";
+}
+
+static HChar *
+s390_irgen_SLL(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, binop(Iop_Shl32, get_gpr_w1(r1), unop(Iop_64to8,
+              binop(Iop_And64, mkexpr(op2addr), mkU64(63)))));
+
+   return "sll";
+}
+
+static HChar *
+s390_irgen_SLLK(UChar r1, UChar r3, IRTemp op2addr)
+{
+   put_gpr_w1(r1, binop(Iop_Shl32, get_gpr_w1(r3), unop(Iop_64to8,
+              binop(Iop_And64, mkexpr(op2addr), mkU64(63)))));
+
+   return "sllk";
+}
+
+static HChar *
+s390_irgen_SLLG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   put_gpr_dw0(r1, binop(Iop_Shl64, get_gpr_dw0(r3), unop(Iop_64to8,
+               binop(Iop_And64, mkexpr(op2addr), mkU64(63)))));
+
+   return "sllg";
+}
+
+static HChar *
+s390_irgen_SRDA(UChar r1, IRTemp op2addr)
+{
+   IRTemp p1 = newTemp(Ity_I64);
+   IRTemp p2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(p1, unop(Iop_32Uto64, get_gpr_w1(r1)));
+   assign(p2, unop(Iop_32Uto64, get_gpr_w1(r1 + 1)));
+   assign(result, binop(Iop_Sar64, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(p1),
+          mkU8(32)), mkexpr(p2)), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, result);
+
+   return "srda";
+}
+
+static HChar *
+s390_irgen_SRDL(UChar r1, IRTemp op2addr)
+{
+   IRTemp p1 = newTemp(Ity_I64);
+   IRTemp p2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(p1, unop(Iop_32Uto64, get_gpr_w1(r1)));
+   assign(p2, unop(Iop_32Uto64, get_gpr_w1(r1 + 1)));
+   assign(result, binop(Iop_Shr64, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(p1),
+          mkU8(32)), mkexpr(p2)), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result)));
+
+   return "srdl";
+}
+
+static HChar *
+s390_irgen_SRA(UChar r1, IRTemp op2addr)
+{
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r1));
+   assign(result, binop(Iop_Sar32, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, result);
+
+   return "sra";
+}
+
+static HChar *
+s390_irgen_SRAK(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r3));
+   assign(result, binop(Iop_Sar32, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, result);
+
+   return "srak";
+}
+
+static HChar *
+s390_irgen_SRAG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I64);
+
+   assign(op, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Sar64, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+          mkexpr(op2addr), mkU64(63)))));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putS(S390_CC_OP_LOAD_AND_TEST, result);
+
+   return "srag";
+}
+
+static HChar *
+s390_irgen_SRL(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r1));
+   put_gpr_w1(r1, binop(Iop_Shr32, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+              mkexpr(op2addr), mkU64(63)))));
+
+   return "srl";
+}
+
+static HChar *
+s390_irgen_SRLK(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_I32);
+
+   assign(op, get_gpr_w1(r3));
+   put_gpr_w1(r1, binop(Iop_Shr32, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+              mkexpr(op2addr), mkU64(63)))));
+
+   return "srlk";
+}
+
+static HChar *
+s390_irgen_SRLG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_I64);
+
+   assign(op, get_gpr_dw0(r3));
+   put_gpr_dw0(r1, binop(Iop_Shr64, mkexpr(op), unop(Iop_64to8, binop(Iop_And64,
+               mkexpr(op2addr), mkU64(63)))));
+
+   return "srlg";
+}
+
+static HChar *
+s390_irgen_ST(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_w1(r1));
+
+   return "st";
+}
+
+static HChar *
+s390_irgen_STY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_w1(r1));
+
+   return "sty";
+}
+
+static HChar *
+s390_irgen_STG(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_dw0(r1));
+
+   return "stg";
+}
+
+static HChar *
+s390_irgen_STRL(UChar r1, UInt i2)
+{
+   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
+         get_gpr_w1(r1));
+
+   return "strl";
+}
+
+static HChar *
+s390_irgen_STGRL(UChar r1, UInt i2)
+{
+   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
+         get_gpr_dw0(r1));
+
+   return "stgrl";
+}
+
+static HChar *
+s390_irgen_STC(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b7(r1));
+
+   return "stc";
+}
+
+static HChar *
+s390_irgen_STCY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b7(r1));
+
+   return "stcy";
+}
+
+static HChar *
+s390_irgen_STCH(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b3(r1));
+
+   return "stch";
+}
+
+static HChar *
+s390_irgen_STCM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar mask;
+   UChar n;
+
+   mask = (UChar)r3;
+   n = 0;
+   if ((mask & 8) != 0) {
+      store(mkexpr(op2addr), get_gpr_b4(r1));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b5(r1));
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b6(r1));
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b7(r1));
+   }
+
+   return "stcm";
+}
+
+static HChar *
+s390_irgen_STCMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar mask;
+   UChar n;
+
+   mask = (UChar)r3;
+   n = 0;
+   if ((mask & 8) != 0) {
+      store(mkexpr(op2addr), get_gpr_b4(r1));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b5(r1));
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b6(r1));
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b7(r1));
+   }
+
+   return "stcmy";
+}
+
+static HChar *
+s390_irgen_STCMH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar mask;
+   UChar n;
+
+   mask = (UChar)r3;
+   n = 0;
+   if ((mask & 8) != 0) {
+      store(mkexpr(op2addr), get_gpr_b0(r1));
+      n = n + 1;
+   }
+   if ((mask & 4) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b1(r1));
+      n = n + 1;
+   }
+   if ((mask & 2) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b2(r1));
+      n = n + 1;
+   }
+   if ((mask & 1) != 0) {
+      store(binop(Iop_Add64, mkexpr(op2addr), mkU64(n)), get_gpr_b3(r1));
+   }
+
+   return "stcmh";
+}
+
+static HChar *
+s390_irgen_STH(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_hw3(r1));
+
+   return "sth";
+}
+
+static HChar *
+s390_irgen_STHY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_hw3(r1));
+
+   return "sthy";
+}
+
+static HChar *
+s390_irgen_STHRL(UChar r1, UInt i2)
+{
+   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
+         get_gpr_hw3(r1));
+
+   return "sthrl";
+}
+
+static HChar *
+s390_irgen_STHH(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_hw1(r1));
+
+   return "sthh";
+}
+
+static HChar *
+s390_irgen_STFH(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_w0(r1));
+
+   return "stfh";
+}
+
+static HChar *
+s390_irgen_STOC(UChar r1, IRTemp op2addr)
+{
+   /* condition is checked in format handler */
+   store(mkexpr(op2addr), get_gpr_w1(r1));
+
+   return "stoc";
+}
+
+static HChar *
+s390_irgen_STOCG(UChar r1, IRTemp op2addr)
+{
+   /* condition is checked in format handler */
+   store(mkexpr(op2addr), get_gpr_dw0(r1));
+
+   return "stocg";
+}
+
+static HChar *
+s390_irgen_STPQ(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_dw0(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(8)), get_gpr_dw0(r1 + 1));
+
+   return "stpq";
+}
+
+static HChar *
+s390_irgen_STRVH(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b7(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(1)), get_gpr_b6(r1));
+
+   return "strvh";
+}
+
+static HChar *
+s390_irgen_STRV(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b7(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(1)), get_gpr_b6(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(2)), get_gpr_b5(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(3)), get_gpr_b4(r1));
+
+   return "strv";
+}
+
+static HChar *
+s390_irgen_STRVG(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_gpr_b7(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(1)), get_gpr_b6(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(2)), get_gpr_b5(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(3)), get_gpr_b4(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(4)), get_gpr_b3(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(5)), get_gpr_b2(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(6)), get_gpr_b1(r1));
+   store(binop(Iop_Add64, mkexpr(op2addr), mkU64(7)), get_gpr_b0(r1));
+
+   return "strvg";
+}
+
+static HChar *
+s390_irgen_SR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "sr";
+}
+
+static HChar *
+s390_irgen_SGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "sgr";
+}
+
+static HChar *
+s390_irgen_SGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "sgfr";
+}
+
+static HChar *
+s390_irgen_SRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op2, op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "srk";
+}
+
+static HChar *
+s390_irgen_SGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Sub64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, op2, op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "sgrk";
+}
+
+static HChar *
+s390_irgen_S(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "s";
+}
+
+static HChar *
+s390_irgen_SY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "sy";
+}
+
+static HChar *
+s390_irgen_SG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "sg";
+}
+
+static HChar *
+s390_irgen_SGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "sgf";
+}
+
+static HChar *
+s390_irgen_SH(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "sh";
+}
+
+static HChar *
+s390_irgen_SHY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkexpr(op2addr))));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "shy";
+}
+
+static HChar *
+s390_irgen_SHHHR(UChar r3 __attribute__((unused)), UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r1));
+   assign(op3, get_gpr_w0(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "shhhr";
+}
+
+static HChar *
+s390_irgen_SHHLR(UChar r3 __attribute__((unused)), UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r1));
+   assign(op3, get_gpr_w1(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putSS(S390_CC_OP_SIGNED_SUB_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "shhlr";
+}
+
+static HChar *
+s390_irgen_SLR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "slr";
+}
+
+static HChar *
+s390_irgen_SLGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slgr";
+}
+
+static HChar *
+s390_irgen_SLGFR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, get_gpr_w1(r2)));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slgfr";
+}
+
+static HChar *
+s390_irgen_SLRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   assign(op3, get_gpr_w1(r3));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op2, op3);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "slrk";
+}
+
+static HChar *
+s390_irgen_SLGRK(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   assign(op3, get_gpr_dw0(r3));
+   assign(result, binop(Iop_Sub64, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op2, op3);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slgrk";
+}
+
+static HChar *
+s390_irgen_SL(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "sl";
+}
+
+static HChar *
+s390_irgen_SLY(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op1, op2);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "sly";
+}
+
+static HChar *
+s390_irgen_SLG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slg";
+}
+
+static HChar *
+s390_irgen_SLGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr))));
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op1, op2);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slgf";
+}
+
+static HChar *
+s390_irgen_SLFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   UInt op2;
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   op2 = i2;
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkU32(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op1, mktemp(Ity_I32,
+                       mkU32(op2)));
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "slfi";
+}
+
+static HChar *
+s390_irgen_SLGFI(UChar r1, UInt i2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   ULong op2;
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   op2 = (ULong)i2;
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkU64(op2)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_64, op1, mktemp(Ity_I64,
+                       mkU64(op2)));
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slgfi";
+}
+
+static HChar *
+s390_irgen_SLHHHR(UChar r3 __attribute__((unused)), UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r1));
+   assign(op3, get_gpr_w0(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "slhhhr";
+}
+
+static HChar *
+s390_irgen_SLHHLR(UChar r3 __attribute__((unused)), UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w0(r1));
+   assign(op3, get_gpr_w1(r2));
+   assign(result, binop(Iop_Sub32, mkexpr(op2), mkexpr(op3)));
+   s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_SUB_32, op2, op3);
+   put_gpr_w0(r1, mkexpr(result));
+
+   return "slhhlr";
+}
+
+static HChar *
+s390_irgen_SLBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp borrow_in = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, get_gpr_w1(r2));
+   assign(borrow_in, binop(Iop_Sub32, mkU32(1), binop(Iop_Shr32,
+          s390_call_calculate_cc(), mkU8(1))));
+   assign(result, binop(Iop_Sub32, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)),
+          mkexpr(borrow_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_SUBB_32, op1, op2, borrow_in);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "slbr";
+}
+
+static HChar *
+s390_irgen_SLBGR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp borrow_in = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, get_gpr_dw0(r2));
+   assign(borrow_in, unop(Iop_32Uto64, binop(Iop_Sub32, mkU32(1),
+          binop(Iop_Shr32, s390_call_calculate_cc(), mkU8(1)))));
+   assign(result, binop(Iop_Sub64, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)),
+          mkexpr(borrow_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_SUBB_64, op1, op2, borrow_in);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slbgr";
+}
+
+static HChar *
+s390_irgen_SLB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp op2 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp borrow_in = newTemp(Ity_I32);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+   assign(borrow_in, binop(Iop_Sub32, mkU32(1), binop(Iop_Shr32,
+          s390_call_calculate_cc(), mkU8(1))));
+   assign(result, binop(Iop_Sub32, binop(Iop_Sub32, mkexpr(op1), mkexpr(op2)),
+          mkexpr(borrow_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_SUBB_32, op1, op2, borrow_in);
+   put_gpr_w1(r1, mkexpr(result));
+
+   return "slb";
+}
+
+static HChar *
+s390_irgen_SLBG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp op2 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp borrow_in = newTemp(Ity_I64);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+   assign(borrow_in, unop(Iop_32Uto64, binop(Iop_Sub32, mkU32(1),
+          binop(Iop_Shr32, s390_call_calculate_cc(), mkU8(1)))));
+   assign(result, binop(Iop_Sub64, binop(Iop_Sub64, mkexpr(op1), mkexpr(op2)),
+          mkexpr(borrow_in)));
+   s390_cc_thunk_putZZZ(S390_CC_OP_UNSIGNED_SUBB_64, op1, op2, borrow_in);
+   put_gpr_dw0(r1, mkexpr(result));
+
+   return "slbg";
+}
+
+static HChar *
+s390_irgen_SVC(UChar i)
+{
+   IRTemp sysno = newTemp(Ity_I64);
+
+   if (i != 0) {
+      assign(sysno, mkU64(i));
+   } else {
+      assign(sysno, unop(Iop_32Uto64, get_gpr_w1(1)));
+   }
+   system_call(mkexpr(sysno));
+
+   return "svc";
+}
+
+static HChar *
+s390_irgen_TS(IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_I8);
+
+   assign(value, load(Ity_I8, mkexpr(op2addr)));
+   s390_cc_thunk_putZ(S390_CC_OP_TEST_AND_SET, value);
+   store(mkexpr(op2addr), mkU8(255));
+
+   return "ts";
+}
+
+static HChar *
+s390_irgen_TM(UChar i2, IRTemp op1addr)
+{
+   UChar mask;
+   IRTemp value = newTemp(Ity_I8);
+
+   mask = i2;
+   assign(value, load(Ity_I8, mkexpr(op1addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_8, value, mktemp(Ity_I8,
+                       mkU8(mask)));
+
+   return "tm";
+}
+
+static HChar *
+s390_irgen_TMY(UChar i2, IRTemp op1addr)
+{
+   UChar mask;
+   IRTemp value = newTemp(Ity_I8);
+
+   mask = i2;
+   assign(value, load(Ity_I8, mkexpr(op1addr)));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_8, value, mktemp(Ity_I8,
+                       mkU8(mask)));
+
+   return "tmy";
+}
+
+static HChar *
+s390_irgen_TMHH(UChar r1, UShort i2)
+{
+   UShort mask;
+   IRTemp value = newTemp(Ity_I16);
+
+   mask = i2;
+   assign(value, get_gpr_hw0(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_16, value, mktemp(Ity_I16,
+                       mkU16(mask)));
+
+   return "tmhh";
+}
+
+static HChar *
+s390_irgen_TMHL(UChar r1, UShort i2)
+{
+   UShort mask;
+   IRTemp value = newTemp(Ity_I16);
+
+   mask = i2;
+   assign(value, get_gpr_hw1(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_16, value, mktemp(Ity_I16,
+                       mkU16(mask)));
+
+   return "tmhl";
+}
+
+static HChar *
+s390_irgen_TMLH(UChar r1, UShort i2)
+{
+   UShort mask;
+   IRTemp value = newTemp(Ity_I16);
+
+   mask = i2;
+   assign(value, get_gpr_hw2(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_16, value, mktemp(Ity_I16,
+                       mkU16(mask)));
+
+   return "tmlh";
+}
+
+static HChar *
+s390_irgen_TMLL(UChar r1, UShort i2)
+{
+   UShort mask;
+   IRTemp value = newTemp(Ity_I16);
+
+   mask = i2;
+   assign(value, get_gpr_hw3(r1));
+   s390_cc_thunk_putZZ(S390_CC_OP_TEST_UNDER_MASK_16, value, mktemp(Ity_I16,
+                       mkU16(mask)));
+
+   return "tmll";
+}
+
+static HChar *
+s390_irgen_EFPC(UChar r1)
+{
+   put_gpr_w1(r1, get_fpc_w0());
+
+   return "efpc";
+}
+
+static HChar *
+s390_irgen_LER(UChar r1, UChar r2)
+{
+   put_fpr_w0(r1, get_fpr_w0(r2));
+
+   return "ler";
+}
+
+static HChar *
+s390_irgen_LDR(UChar r1, UChar r2)
+{
+   put_fpr_dw0(r1, get_fpr_dw0(r2));
+
+   return "ldr";
+}
+
+static HChar *
+s390_irgen_LXR(UChar r1, UChar r2)
+{
+   put_fpr_dw0(r1, get_fpr_dw0(r2));
+   put_fpr_dw0(r1 + 2, get_fpr_dw0(r2 + 2));
+
+   return "lxr";
+}
+
+static HChar *
+s390_irgen_LE(UChar r1, IRTemp op2addr)
+{
+   put_fpr_w0(r1, load(Ity_F32, mkexpr(op2addr)));
+
+   return "le";
+}
+
+static HChar *
+s390_irgen_LD(UChar r1, IRTemp op2addr)
+{
+   put_fpr_dw0(r1, load(Ity_F64, mkexpr(op2addr)));
+
+   return "ld";
+}
+
+static HChar *
+s390_irgen_LEY(UChar r1, IRTemp op2addr)
+{
+   put_fpr_w0(r1, load(Ity_F32, mkexpr(op2addr)));
+
+   return "ley";
+}
+
+static HChar *
+s390_irgen_LDY(UChar r1, IRTemp op2addr)
+{
+   put_fpr_dw0(r1, load(Ity_F64, mkexpr(op2addr)));
+
+   return "ldy";
+}
+
+static HChar *
+s390_irgen_LFPC(IRTemp op2addr)
+{
+   put_fpc_w0(load(Ity_I32, mkexpr(op2addr)));
+
+   return "lfpc";
+}
+
+static HChar *
+s390_irgen_LZER(UChar r1)
+{
+   put_fpr_w0(r1, mkF32i(0x0));
+
+   return "lzer";
+}
+
+static HChar *
+s390_irgen_LZDR(UChar r1)
+{
+   put_fpr_dw0(r1, mkF64i(0x0));
+
+   return "lzdr";
+}
+
+static HChar *
+s390_irgen_LZXR(UChar r1)
+{
+   put_fpr_dw0(r1, mkF64i(0x0));
+   put_fpr_dw0(r1 + 2, mkF64i(0x0));
+
+   return "lzxr";
+}
+
+static HChar *
+s390_irgen_SRNM(IRTemp op2addr)
+{
+   UInt mask;
+
+   mask = 3;
+   put_fpc_w0(binop(Iop_Or32, binop(Iop_And32, get_fpc_w0(), mkU32(~mask)),
+              binop(Iop_And32, unop(Iop_64to32, mkexpr(op2addr)), mkU32(mask)))
+              );
+
+   return "srnm";
+}
+
+static HChar *
+s390_irgen_SFPC(UChar r1)
+{
+   put_fpc_w0(get_gpr_w1(r1));
+
+   return "sfpc";
+}
+
+static HChar *
+s390_irgen_STE(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_fpr_w0(r1));
+
+   return "ste";
+}
+
+static HChar *
+s390_irgen_STD(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_fpr_dw0(r1));
+
+   return "std";
+}
+
+static HChar *
+s390_irgen_STEY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_fpr_w0(r1));
+
+   return "stey";
+}
+
+static HChar *
+s390_irgen_STDY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_fpr_dw0(r1));
+
+   return "stdy";
+}
+
+static HChar *
+s390_irgen_STFPC(IRTemp op2addr)
+{
+   store(mkexpr(op2addr), get_fpc_w0());
+
+   return "stfpc";
+}
+
+static HChar *
+s390_irgen_AEBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, get_fpr_w0(r2));
+   assign(result, triop(Iop_AddF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "aebr";
+}
+
+static HChar *
+s390_irgen_ADBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, get_fpr_dw0(r2));
+   assign(result, triop(Iop_AddF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "adbr";
+}
+
+static HChar *
+s390_irgen_AEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, load(Ity_F32, mkexpr(op2addr)));
+   assign(result, triop(Iop_AddF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "aeb";
+}
+
+static HChar *
+s390_irgen_ADB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, load(Ity_F64, mkexpr(op2addr)));
+   assign(result, triop(Iop_AddF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "adb";
+}
+
+static HChar *
+s390_irgen_CEFBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   put_fpr_w0(r1, binop(Iop_I32StoF32, mkU32(Irrm_NEAREST), mkexpr(op2)));
+
+   return "cefbr";
+}
+
+static HChar *
+s390_irgen_CDFBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   put_fpr_dw0(r1, unop(Iop_I32StoF64, mkexpr(op2)));
+
+   return "cdfbr";
+}
+
+static HChar *
+s390_irgen_CEGBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   put_fpr_w0(r1, binop(Iop_I64StoF32, mkU32(Irrm_NEAREST), mkexpr(op2)));
+
+   return "cegbr";
+}
+
+static HChar *
+s390_irgen_CDGBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   put_fpr_dw0(r1, binop(Iop_I64StoF64, mkU32(Irrm_NEAREST), mkexpr(op2)));
+
+   return "cdgbr";
+}
+
+static HChar *
+s390_irgen_CFEBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op, get_fpr_w0(r2));
+   assign(result, binop(Iop_F32toI32S, mkU32(encode_rounding_mode(r3)),
+          mkexpr(op)));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_32_TO_INT_32, op);
+
+   return "cfebr";
+}
+
+static HChar *
+s390_irgen_CFDBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op, get_fpr_dw0(r2));
+   assign(result, binop(Iop_F64toI32S, mkU32(encode_rounding_mode(r3)),
+          mkexpr(op)));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_64_TO_INT_32, op);
+
+   return "cfdbr";
+}
+
+static HChar *
+s390_irgen_CGEBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op, get_fpr_w0(r2));
+   assign(result, binop(Iop_F32toI64S, mkU32(encode_rounding_mode(r3)),
+          mkexpr(op)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_32_TO_INT_64, op);
+
+   return "cgebr";
+}
+
+static HChar *
+s390_irgen_CGDBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op, get_fpr_dw0(r2));
+   assign(result, binop(Iop_F64toI64S, mkU32(encode_rounding_mode(r3)),
+          mkexpr(op)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_64_TO_INT_64, op);
+
+   return "cgdbr";
+}
+
+static HChar *
+s390_irgen_DEBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, get_fpr_w0(r2));
+   assign(result, triop(Iop_DivF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "debr";
+}
+
+static HChar *
+s390_irgen_DDBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, get_fpr_dw0(r2));
+   assign(result, triop(Iop_DivF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "ddbr";
+}
+
+static HChar *
+s390_irgen_DEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, load(Ity_F32, mkexpr(op2addr)));
+   assign(result, triop(Iop_DivF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "deb";
+}
+
+static HChar *
+s390_irgen_DDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, load(Ity_F64, mkexpr(op2addr)));
+   assign(result, triop(Iop_DivF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "ddb";
+}
+
+static HChar *
+s390_irgen_LTEBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, get_fpr_w0(r2));
+   put_fpr_w0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+
+   return "ltebr";
+}
+
+static HChar *
+s390_irgen_LTDBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, get_fpr_dw0(r2));
+   put_fpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+
+   return "ltdbr";
+}
+
+static HChar *
+s390_irgen_LCEBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, unop(Iop_NegF32, get_fpr_w0(r2)));
+   put_fpr_w0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+
+   return "lcebr";
+}
+
+static HChar *
+s390_irgen_LCDBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_NegF64, get_fpr_dw0(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+
+   return "lcdbr";
+}
+
+static HChar *
+s390_irgen_LDEBR(UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F32);
+
+   assign(op, get_fpr_w0(r2));
+   put_fpr_dw0(r1, unop(Iop_F32toF64, mkexpr(op)));
+
+   return "ldebr";
+}
+
+static HChar *
+s390_irgen_LDEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_F32);
+
+   assign(op, load(Ity_F32, mkexpr(op2addr)));
+   put_fpr_dw0(r1, unop(Iop_F32toF64, mkexpr(op)));
+
+   return "ldeb";
+}
+
+static HChar *
+s390_irgen_LEDBR(UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F64);
+
+   assign(op, get_fpr_dw0(r2));
+   put_fpr_w0(r1, binop(Iop_F64toF32, mkU32(Irrm_NEAREST), mkexpr(op)));
+
+   return "ledbr";
+}
+
+static HChar *
+s390_irgen_MEEBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, get_fpr_w0(r2));
+   assign(result, triop(Iop_MulF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "meebr";
+}
+
+static HChar *
+s390_irgen_MDBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, get_fpr_dw0(r2));
+   assign(result, triop(Iop_MulF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "mdbr";
+}
+
+static HChar *
+s390_irgen_MEEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, load(Ity_F32, mkexpr(op2addr)));
+   assign(result, triop(Iop_MulF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "meeb";
+}
+
+static HChar *
+s390_irgen_MDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, load(Ity_F64, mkexpr(op2addr)));
+   assign(result, triop(Iop_MulF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "mdb";
+}
+
+static HChar *
+s390_irgen_SEBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, get_fpr_w0(r2));
+   assign(result, triop(Iop_SubF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "sebr";
+}
+
+static HChar *
+s390_irgen_SDBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, get_fpr_dw0(r2));
+   assign(result, triop(Iop_SubF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "sdbr";
+}
+
+static HChar *
+s390_irgen_SEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, load(Ity_F32, mkexpr(op2addr)));
+   assign(result, triop(Iop_SubF32, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_32, result);
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "seb";
+}
+
+static HChar *
+s390_irgen_SDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, load(Ity_F64, mkexpr(op2addr)));
+   assign(result, triop(Iop_SubF64, mkU32(Irrm_NEAREST), mkexpr(op1),
+          mkexpr(op2)));
+   s390_cc_thunk_putF(S390_CC_OP_BFP_RESULT_64, result);
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "sdb";
+}
+
+
+static HChar *
+s390_irgen_CLC(UChar length, IRTemp start1, IRTemp start2)
+{
+   IRTemp current1 = newTemp(Ity_I8);
+   IRTemp current2 = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I64);
+
+   assign(counter, get_counter_dw0());
+   put_counter_dw0(mkU64(0));
+
+   assign(current1, load(Ity_I8, binop(Iop_Add64, mkexpr(start1),
+                                       mkexpr(counter))));
+   assign(current2, load(Ity_I8, binop(Iop_Add64, mkexpr(start2),
+                                       mkexpr(counter))));
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, current1, current2,
+                      False);
+
+   /* Both fields differ ? */
+   if_condition_goto(binop(Iop_CmpNE8, mkexpr(current1), mkexpr(current2)),
+                     guest_IA_next_instr);
+
+   /* Check for end of field */
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkU64(length)),
+                     guest_IA_curr_instr);
+   put_counter_dw0(mkU64(0));
+
+   return "clc";
+}
+
+static HChar *
+s390_irgen_CLCL(UChar r1, UChar r2)
+{
+   IRTemp addr1 = newTemp(Ity_I64);
+   IRTemp addr2 = newTemp(Ity_I64);
+   IRTemp addr1_load = newTemp(Ity_I64);
+   IRTemp addr2_load = newTemp(Ity_I64);
+   IRTemp len1 = newTemp(Ity_I32);
+   IRTemp len2 = newTemp(Ity_I32);
+   IRTemp r1p1 = newTemp(Ity_I32);   /* contents of r1 + 1 */
+   IRTemp r2p1 = newTemp(Ity_I32);   /* contents of r2 + 1 */
+   IRTemp single1 = newTemp(Ity_I8);
+   IRTemp single2 = newTemp(Ity_I8);
+   IRTemp pad = newTemp(Ity_I8);
+
+   assign(addr1, get_gpr_dw0(r1));
+   assign(r1p1, get_gpr_w1(r1 + 1));
+   assign(len1, binop(Iop_And32, mkexpr(r1p1), mkU32(0x00ffffff)));
+   assign(addr2, get_gpr_dw0(r2));
+   assign(r2p1, get_gpr_w1(r2 + 1));
+   assign(len2, binop(Iop_And32, mkexpr(r2p1), mkU32(0x00ffffff)));
+   assign(pad, get_gpr_b4(r2 + 1));
+
+   /* len1 == 0 and len2 == 0? Exit */
+   s390_cc_set(0);
+   if_condition_goto(binop(Iop_CmpEQ32, binop(Iop_Or32, mkexpr(len1),
+                                              mkexpr(len2)), mkU32(0)),
+                     guest_IA_next_instr);
+
+   /* Because mkite evaluates both the then-clause and the else-clause
+      we cannot load directly from addr1 here. If len1 is 0, then adddr1
+      may be NULL and loading from there would segfault. So we provide a
+      valid dummy address in that case. Loading from there does no harm and
+      the value will be discarded at runtime. */
+   assign(addr1_load,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len1), mkU32(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr1)));
+   assign(single1,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len1), mkU32(0)),
+                mkexpr(pad), load(Ity_I8, mkexpr(addr1_load))));
+
+   assign(addr2_load,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr2)));
+   assign(single2,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                mkexpr(pad), load(Ity_I8, mkexpr(addr2_load))));
+
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, single1, single2, False);
+   /* Fields differ ? */
+   if_condition_goto(binop(Iop_CmpNE8, mkexpr(single1), mkexpr(single2)),
+                     guest_IA_next_instr);
+
+   /* Update len1 and addr1, unless len1 == 0. */
+   put_gpr_dw0(r1,
+               mkite(binop(Iop_CmpEQ32, mkexpr(len1), mkU32(0)),
+                     mkexpr(addr1),
+                     binop(Iop_Add64, mkexpr(addr1), mkU64(1))));
+
+   /* When updating len1 we must not modify bits (r1+1)[0:39] */
+   put_gpr_w1(r1 + 1,
+              mkite(binop(Iop_CmpEQ32, mkexpr(len1), mkU32(0)),
+                    binop(Iop_And32, mkexpr(r1p1), mkU32(0xFF000000u)),
+                    binop(Iop_Sub32, mkexpr(r1p1), mkU32(1))));
+
+   /* Update len2 and addr2, unless len2 == 0. */
+   put_gpr_dw0(r2,
+               mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                     mkexpr(addr2),
+                     binop(Iop_Add64, mkexpr(addr2), mkU64(1))));
+
+   /* When updating len2 we must not modify bits (r2+1)[0:39] */
+   put_gpr_w1(r2 + 1,
+              mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                    binop(Iop_And32, mkexpr(r2p1), mkU32(0xFF000000u)),
+                    binop(Iop_Sub32, mkexpr(r2p1), mkU32(1))));
+
+   always_goto_and_chase(guest_IA_curr_instr);
+
+   return "clcl";
+}
+
+static HChar *
+s390_irgen_CLCLE(UChar r1, UChar r3, IRTemp pad2)
+{
+   IRTemp addr1, addr3, addr1_load, addr3_load, len1, len3, single1, single3;
+
+   addr1 = newTemp(Ity_I64);
+   addr3 = newTemp(Ity_I64);
+   addr1_load = newTemp(Ity_I64);
+   addr3_load = newTemp(Ity_I64);
+   len1 = newTemp(Ity_I64);
+   len3 = newTemp(Ity_I64);
+   single1 = newTemp(Ity_I8);
+   single3 = newTemp(Ity_I8);
+
+   assign(addr1, get_gpr_dw0(r1));
+   assign(len1, get_gpr_dw0(r1 + 1));
+   assign(addr3, get_gpr_dw0(r3));
+   assign(len3, get_gpr_dw0(r3 + 1));
+
+   /* len1 == 0 and len3 == 0? Exit */
+   s390_cc_set(0);
+   if_condition_goto(binop(Iop_CmpEQ64,binop(Iop_Or64, mkexpr(len1),
+                                             mkexpr(len3)), mkU64(0)),
+                     guest_IA_next_instr);
+
+   /* A mux requires both ways to be possible. This is a way to prevent clcle
+      from reading from addr1 if it should read from the pad. Since the pad
+      has no address, just read from the instruction, we discard that anyway */
+   assign(addr1_load,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len1), mkU64(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr1)));
+
+   /* same for addr3 */
+   assign(addr3_load,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr3)));
+
+   assign(single1,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len1), mkU64(0)),
+                unop(Iop_64to8, mkexpr(pad2)),
+                load(Ity_I8, mkexpr(addr1_load))));
+
+   assign(single3,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                unop(Iop_64to8, mkexpr(pad2)),
+                load(Ity_I8, mkexpr(addr3_load))));
+
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, single1, single3, False);
+   /* Both fields differ ? */
+   if_condition_goto(binop(Iop_CmpNE8, mkexpr(single1), mkexpr(single3)),
+                     guest_IA_next_instr);
+
+   /* If a length in 0 we must not change this length and the address */
+   put_gpr_dw0(r1,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len1), mkU64(0)),
+                     mkexpr(addr1),
+                     binop(Iop_Add64, mkexpr(addr1), mkU64(1))));
+
+   put_gpr_dw0(r1 + 1,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len1), mkU64(0)),
+                     mkU64(0), binop(Iop_Sub64, mkexpr(len1), mkU64(1))));
+
+   put_gpr_dw0(r3,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                     mkexpr(addr3),
+                     binop(Iop_Add64, mkexpr(addr3), mkU64(1))));
+
+   put_gpr_dw0(r3 + 1,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                     mkU64(0), binop(Iop_Sub64, mkexpr(len3), mkU64(1))));
+
+   /* The architecture requires that we exit with CC3 after a machine specific
+      amount of bytes. We do that if len1+len3 % 4096 == 0 */
+   s390_cc_set(3);
+   if_condition_goto(binop(Iop_CmpEQ64,
+                           binop(Iop_And64,
+                                 binop(Iop_Add64, mkexpr(len1), mkexpr(len3)),
+                                 mkU64(0xfff)),
+                           mkU64(0)),
+                     guest_IA_next_instr);
+
+   always_goto_and_chase(guest_IA_curr_instr);
+
+   return "clcle";
+}
+
+static void
+s390_irgen_XC_EX(IRTemp length, IRTemp start1, IRTemp start2)
+{
+   IRTemp old1 = newTemp(Ity_I8);
+   IRTemp old2 = newTemp(Ity_I8);
+   IRTemp new1 = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I32);
+   IRTemp addr1 = newTemp(Ity_I64);
+
+   assign(counter, get_counter_w0());
+
+   assign(addr1, binop(Iop_Add64, mkexpr(start1),
+                       unop(Iop_32Uto64, mkexpr(counter))));
+
+   assign(old1, load(Ity_I8, mkexpr(addr1)));
+   assign(old2, load(Ity_I8, binop(Iop_Add64, mkexpr(start2),
+                                   unop(Iop_32Uto64,mkexpr(counter)))));
+   assign(new1, binop(Iop_Xor8, mkexpr(old1), mkexpr(old2)));
+
+   store(mkexpr(addr1),
+         mkite(binop(Iop_CmpEQ64, mkexpr(start1), mkexpr(start2)),
+               mkU8(0), mkexpr(new1)));
+   put_counter_w1(binop(Iop_Or32, unop(Iop_8Uto32, mkexpr(new1)),
+                        get_counter_w1()));
+
+   /* Check for end of field */
+   put_counter_w0(binop(Iop_Add32, mkexpr(counter), mkU32(1)));
+   if_condition_goto(binop(Iop_CmpNE32, mkexpr(counter), mkexpr(length)),
+                     guest_IA_curr_instr);
+   s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, get_counter_w1()),
+                      False);
+   put_counter_dw0(mkU64(0));
+}
+
+
+static void
+s390_irgen_CLC_EX(IRTemp length, IRTemp start1, IRTemp start2)
+{
+   IRTemp current1 = newTemp(Ity_I8);
+   IRTemp current2 = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I64);
+
+   assign(counter, get_counter_dw0());
+   put_counter_dw0(mkU64(0));
+
+   assign(current1, load(Ity_I8, binop(Iop_Add64, mkexpr(start1),
+                                       mkexpr(counter))));
+   assign(current2, load(Ity_I8, binop(Iop_Add64, mkexpr(start2),
+                                       mkexpr(counter))));
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, current1, current2,
+                      False);
+
+   /* Both fields differ ? */
+   if_condition_goto(binop(Iop_CmpNE8, mkexpr(current1), mkexpr(current2)),
+                     guest_IA_next_instr);
+
+   /* Check for end of field */
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(length)),
+                     guest_IA_curr_instr);
+   put_counter_dw0(mkU64(0));
+}
+
+static void
+s390_irgen_MVC_EX(IRTemp length, IRTemp start1, IRTemp start2)
+{
+   IRTemp counter = newTemp(Ity_I64);
+
+   assign(counter, get_counter_dw0());
+
+   store(binop(Iop_Add64, mkexpr(start1), mkexpr(counter)),
+         load(Ity_I8, binop(Iop_Add64, mkexpr(start2), mkexpr(counter))));
+
+   /* Check for end of field */
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(length)),
+                     guest_IA_curr_instr);
+   put_counter_dw0(mkU64(0));
+}
+
+
+
+static void
+s390_irgen_EX_SS(UChar r, IRTemp addr2,
+void (*irgen)(IRTemp length, IRTemp start1, IRTemp start2), int lensize)
+{
+   struct SS {
+      unsigned int op :  8;
+      unsigned int l  :  8;
+      unsigned int b1 :  4;
+      unsigned int d1 : 12;
+      unsigned int b2 :  4;
+      unsigned int d2 : 12;
+   };
+   union {
+      struct SS dec;
+      unsigned long bytes;
+   } ss;
+   IRTemp cond;
+   IRDirty *d;
+   IRTemp torun;
+
+   IRTemp start1 = newTemp(Ity_I64);
+   IRTemp start2 = newTemp(Ity_I64);
+   IRTemp len = newTemp(lensize == 64 ? Ity_I64 : Ity_I32);
+   cond = newTemp(Ity_I1);
+   torun = newTemp(Ity_I64);
+
+   assign(torun, load(Ity_I64, mkexpr(addr2)));
+   /* Start with a check that the saved code is still correct */
+   assign(cond, binop(Iop_CmpNE64, mkexpr(torun), mkU64(last_execute_target)));
+   /* If not, save the new value */
+   d = unsafeIRDirty_0_N (0, "s390x_dirtyhelper_EX", &s390x_dirtyhelper_EX,
+                          mkIRExprVec_1(mkexpr(torun)));
+   d->guard = mkexpr(cond);
+   stmt(IRStmt_Dirty(d));
+
+   /* and restart */
+   stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_IA_curr_instr)));
+   stmt(IRStmt_Put(OFFB_TILEN, mkU64(4)));
+   stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
+        IRConst_U64(guest_IA_curr_instr)));
+
+   ss.bytes = last_execute_target;
+   assign(start1, binop(Iop_Add64, mkU64(ss.dec.d1),
+          ss.dec.b1 != 0 ? get_gpr_dw0(ss.dec.b1) : mkU64(0)));
+   assign(start2, binop(Iop_Add64, mkU64(ss.dec.d2),
+          ss.dec.b2 != 0 ? get_gpr_dw0(ss.dec.b2) : mkU64(0)));
+   assign(len, unop(lensize == 64 ? Iop_8Uto64 : Iop_8Uto32, binop(Iop_Or8,
+          r != 0 ? get_gpr_b7(r): mkU8(0), mkU8(ss.dec.l))));
+   irgen(len, start1, start2);
+   last_execute_target = 0;
+}
+
+static HChar *
+s390_irgen_EX(UChar r1, IRTemp addr2)
+{
+   switch(last_execute_target & 0xff00000000000000ULL) {
+   case 0:
+   {
+      /* no code information yet */
+      IRDirty *d;
+
+      /* so safe the code... */
+      d = unsafeIRDirty_0_N (0, "s390x_dirtyhelper_EX", &s390x_dirtyhelper_EX,
+                             mkIRExprVec_1(load(Ity_I64, mkexpr(addr2))));
+      stmt(IRStmt_Dirty(d));
+      /* and restart */
+      stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_IA_curr_instr)));
+      stmt(IRStmt_Put(OFFB_TILEN, mkU64(4)));
+      stmt(IRStmt_Exit(IRExpr_Const(IRConst_U1(True)), Ijk_TInval,
+           IRConst_U64(guest_IA_curr_instr)));
+      /* we know that this will be invalidated */
+      irsb->next = mkU64(guest_IA_next_instr);
+      dis_res->whatNext = Dis_StopHere;
+      break;
+   }
+
+   case 0xd200000000000000ULL:
+      /* special case MVC */
+      s390_irgen_EX_SS(r1, addr2, s390_irgen_MVC_EX, 64);
+      return "mvc via ex";
+
+   case 0xd500000000000000ULL:
+      /* special case CLC */
+      s390_irgen_EX_SS(r1, addr2, s390_irgen_CLC_EX, 64);
+      return "clc via ex";
+
+   case 0xd700000000000000ULL:
+      /* special case XC */
+      s390_irgen_EX_SS(r1, addr2, s390_irgen_XC_EX, 32);
+      return "xc via ex";
+
+
+   default:
+   {
+      /* everything else will get a self checking prefix that also checks the
+         register content */
+      IRDirty *d;
+      UChar *bytes;
+      IRTemp cond;
+      IRTemp orperand;
+      IRTemp torun;
+
+      cond = newTemp(Ity_I1);
+      orperand = newTemp(Ity_I64);
+      torun = newTemp(Ity_I64);
+
+      if (r1 == 0)
+         assign(orperand, mkU64(0));
+      else
+         assign(orperand, unop(Iop_8Uto64,get_gpr_b7(r1)));
+      /* This code is going to be translated */
+      assign(torun, binop(Iop_Or64, load(Ity_I64, mkexpr(addr2)),
+             binop(Iop_Shl64, mkexpr(orperand), mkU8(48))));
+
+      /* Start with a check that saved code is still correct */
+      assign(cond, binop(Iop_CmpNE64, mkexpr(torun),
+             mkU64(last_execute_target)));
+      /* If not, save the new value */
+      d = unsafeIRDirty_0_N (0, "s390x_dirtyhelper_EX", &s390x_dirtyhelper_EX,
+                             mkIRExprVec_1(mkexpr(torun)));
+      d->guard = mkexpr(cond);
+      stmt(IRStmt_Dirty(d));
+
+      /* and restart */
+      stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_IA_curr_instr)));
+      stmt(IRStmt_Put(OFFB_TILEN, mkU64(4)));
+      stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
+           IRConst_U64(guest_IA_curr_instr)));
+
+      /* Now comes the actual translation */
+      bytes = (UChar *) &last_execute_target;
+      s390_decode_and_irgen(bytes, ((((bytes[0] >> 6) + 1) >> 1) + 1) << 1,
+                            dis_res);
+      if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+         vex_printf("    which was executed by\n");
+      /* dont make useless translations in the next execute */
+      last_execute_target = 0;
+   }
+   }
+   return "ex";
+}
+
+static HChar *
+s390_irgen_EXRL(UChar r1, UInt offset)
+{
+   IRTemp addr = newTemp(Ity_I64);
+   /* we might save one round trip because we know the target */
+   if (!last_execute_target)
+      last_execute_target = *(ULong *)(HWord)
+                             (guest_IA_curr_instr + offset * 2UL);
+   assign(addr, mkU64(guest_IA_curr_instr + offset * 2UL));
+   s390_irgen_EX(r1, addr);
+   return "exrl";
+}
+
+static HChar *
+s390_irgen_IPM(UChar r1)
+{
+   // As long as we dont support SPM, lets just assume 0 as program mask
+   put_gpr_b4(r1, unop(Iop_32to8, binop(Iop_Or32, mkU32(0 /* program mask */),
+                       binop(Iop_Shl32, s390_call_calculate_cc(), mkU8(4)))));
+
+   return "ipm";
+}
+
+
+static HChar *
+s390_irgen_SRST(UChar r1, UChar r2)
+{
+   IRTemp address = newTemp(Ity_I64);
+   IRTemp next = newTemp(Ity_I64);
+   IRTemp delim = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I64);
+   IRTemp byte = newTemp(Ity_I8);
+
+   assign(address, get_gpr_dw0(r2));
+   assign(next, get_gpr_dw0(r1));
+
+   assign(counter, get_counter_dw0());
+   put_counter_dw0(mkU64(0));
+
+   // start = next?  CC=2 and out r1 and r2 unchanged
+   s390_cc_set(2);
+   put_gpr_dw0(r2, binop(Iop_Sub64, mkexpr(address), mkexpr(counter)));
+   if_condition_goto(binop(Iop_CmpEQ64, mkexpr(address), mkexpr(next)),
+                     guest_IA_next_instr);
+
+   assign(byte, load(Ity_I8, mkexpr(address)));
+   assign(delim, get_gpr_b7(0));
+
+   // byte = delim? CC=1, R1=address
+   s390_cc_set(1);
+   put_gpr_dw0(r1,  mkexpr(address));
+   if_condition_goto(binop(Iop_CmpEQ8, mkexpr(delim), mkexpr(byte)),
+                     guest_IA_next_instr);
+
+   // else: all equal, no end yet, loop
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   put_gpr_dw0(r1, mkexpr(next));
+   put_gpr_dw0(r2, binop(Iop_Add64, mkexpr(address), mkU64(1)));
+   stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
+                    Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
+   // >= 256 bytes done CC=3
+   s390_cc_set(3);
+   put_counter_dw0(mkU64(0));
+
+   return "srst";
+}
+
+static HChar *
+s390_irgen_CLST(UChar r1, UChar r2)
+{
+   IRTemp address1 = newTemp(Ity_I64);
+   IRTemp address2 = newTemp(Ity_I64);
+   IRTemp end = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I64);
+   IRTemp byte1 = newTemp(Ity_I8);
+   IRTemp byte2 = newTemp(Ity_I8);
+
+   assign(address1, get_gpr_dw0(r1));
+   assign(address2, get_gpr_dw0(r2));
+   assign(end, get_gpr_b7(0));
+   assign(counter, get_counter_dw0());
+   put_counter_dw0(mkU64(0));
+   assign(byte1, load(Ity_I8, mkexpr(address1)));
+   assign(byte2, load(Ity_I8, mkexpr(address2)));
+
+   // end in both? all equal, reset r1 and r2 to start values
+   s390_cc_set(0);
+   put_gpr_dw0(r1, binop(Iop_Sub64, mkexpr(address1), mkexpr(counter)));
+   put_gpr_dw0(r2, binop(Iop_Sub64, mkexpr(address2), mkexpr(counter)));
+   if_condition_goto(binop(Iop_CmpEQ8, mkU8(0),
+                           binop(Iop_Or8,
+                                 binop(Iop_Xor8, mkexpr(byte1), mkexpr(end)),
+                                 binop(Iop_Xor8, mkexpr(byte2), mkexpr(end)))),
+                     guest_IA_next_instr);
+
+   put_gpr_dw0(r1, mkexpr(address1));
+   put_gpr_dw0(r2, mkexpr(address2));
+
+   // End found in string1
+   s390_cc_set(1);
+   if_condition_goto(binop(Iop_CmpEQ8, mkexpr(end), mkexpr(byte1)),
+                     guest_IA_next_instr);
+
+   // End found in string2
+   s390_cc_set(2);
+   if_condition_goto(binop(Iop_CmpEQ8, mkexpr(end), mkexpr(byte2)),
+                     guest_IA_next_instr);
+
+   // string1 < string2
+   s390_cc_set(1);
+   if_condition_goto(binop(Iop_CmpLT32U, unop(Iop_8Uto32, mkexpr(byte1)),
+                           unop(Iop_8Uto32, mkexpr(byte2))),
+                     guest_IA_next_instr);
+
+   // string2 < string1
+   s390_cc_set(2);
+   if_condition_goto(binop(Iop_CmpLT32U, unop(Iop_8Uto32, mkexpr(byte2)),
+                           unop(Iop_8Uto32, mkexpr(byte1))),
+                     guest_IA_next_instr);
+
+   // else: all equal, no end yet, loop
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), mkU64(1)));
+   put_gpr_dw0(r2, binop(Iop_Add64, get_gpr_dw0(r2), mkU64(1)));
+   stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
+                    Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
+   // >= 256 bytes done CC=3
+   s390_cc_set(3);
+   put_counter_dw0(mkU64(0));
+
+   return "clst";
+}
+
+static void
+s390_irgen_load_multiple_32bit(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      put_gpr_w1(reg, load(Ity_I32, mkexpr(addr)));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while (reg != (r3 + 1));
+}
+
+static HChar *
+s390_irgen_LM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_load_multiple_32bit(r1, r3, op2addr);
+
+   return "lm";
+}
+
+static HChar *
+s390_irgen_LMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_load_multiple_32bit(r1, r3, op2addr);
+
+   return "lmy";
+}
+
+static HChar *
+s390_irgen_LMH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      put_gpr_w0(reg, load(Ity_I32, mkexpr(addr)));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while (reg != (r3 + 1));
+
+   return "lmh";
+}
+
+static HChar *
+s390_irgen_LMG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      put_gpr_dw0(reg, load(Ity_I64, mkexpr(addr)));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(8)));
+      reg++;
+   } while (reg != (r3 + 1));
+
+   return "lmg";
+}
+
+static void
+s390_irgen_store_multiple_32bit(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      store(mkexpr(addr), get_gpr_w1(reg));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while( reg != (r3 + 1));
+}
+
+static HChar *
+s390_irgen_STM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_store_multiple_32bit(r1, r3, op2addr);
+
+   return "stm";
+}
+
+static HChar *
+s390_irgen_STMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_store_multiple_32bit(r1, r3, op2addr);
+
+   return "stmy";
+}
+
+static HChar *
+s390_irgen_STMH(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      store(mkexpr(addr), get_gpr_w0(reg));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while( reg != (r3 + 1));
+
+   return "stmh";
+}
+
+static HChar *
+s390_irgen_STMG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      store(mkexpr(addr), get_gpr_dw0(reg));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(8)));
+      reg++;
+   } while( reg != (r3 + 1));
+
+   return "stmg";
+}
+
+static void
+s390_irgen_XONC(IROp op, UChar length, IRTemp start1, IRTemp start2)
+{
+   IRTemp old1 = newTemp(Ity_I8);
+   IRTemp old2 = newTemp(Ity_I8);
+   IRTemp new1 = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I32);
+   IRTemp addr1 = newTemp(Ity_I64);
+
+   assign(counter, get_counter_w0());
+
+   assign(addr1, binop(Iop_Add64, mkexpr(start1),
+                       unop(Iop_32Uto64, mkexpr(counter))));
+
+   assign(old1, load(Ity_I8, mkexpr(addr1)));
+   assign(old2, load(Ity_I8, binop(Iop_Add64, mkexpr(start2),
+                                   unop(Iop_32Uto64,mkexpr(counter)))));
+   assign(new1, binop(op, mkexpr(old1), mkexpr(old2)));
+
+   /* Special case: xc is used to zero memory */
+   if (op == Iop_Xor8) {
+      store(mkexpr(addr1),
+            mkite(binop(Iop_CmpEQ64, mkexpr(start1), mkexpr(start2)),
+                  mkU8(0), mkexpr(new1)));
+   } else
+      store(mkexpr(addr1), mkexpr(new1));
+   put_counter_w1(binop(Iop_Or32, unop(Iop_8Uto32, mkexpr(new1)),
+                        get_counter_w1()));
+
+   /* Check for end of field */
+   put_counter_w0(binop(Iop_Add32, mkexpr(counter), mkU32(1)));
+   if_condition_goto(binop(Iop_CmpNE32, mkexpr(counter), mkU32(length)),
+                     guest_IA_curr_instr);
+   s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, get_counter_w1()),
+                      False);
+   put_counter_dw0(mkU64(0));
+}
+
+static HChar *
+s390_irgen_XC(UChar length, IRTemp start1, IRTemp start2)
+{
+   s390_irgen_XONC(Iop_Xor8, length, start1, start2);
+
+   return "xc";
+}
+
+static void
+s390_irgen_XC_sameloc(UChar length, UChar b, UShort d)
+{
+   IRTemp counter = newTemp(Ity_I32);
+   IRTemp start = newTemp(Ity_I64);
+   IRTemp addr  = newTemp(Ity_I64);
+
+   assign(start,
+          binop(Iop_Add64, mkU64(d), b != 0 ? get_gpr_dw0(b) : mkU64(0)));
+
+   if (length < 8) {
+      UInt i;
+
+      for (i = 0; i <= length; ++i) {
+         store(binop(Iop_Add64, mkexpr(start), mkU64(i)), mkU8(0));
+      }
+   } else {
+     assign(counter, get_counter_w0());
+
+     assign(addr, binop(Iop_Add64, mkexpr(start),
+                        unop(Iop_32Uto64, mkexpr(counter))));
+
+     store(mkexpr(addr), mkU8(0));
+
+     /* Check for end of field */
+     put_counter_w0(binop(Iop_Add32, mkexpr(counter), mkU32(1)));
+     if_condition_goto(binop(Iop_CmpNE32, mkexpr(counter), mkU32(length)),
+                       guest_IA_curr_instr);
+
+     /* Reset counter */
+     put_counter_dw0(mkU64(0));
+   }
+
+   s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, mkU32(0)), False);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d, length, b, d, 0, b);
+}
+
+static HChar *
+s390_irgen_NC(UChar length, IRTemp start1, IRTemp start2)
+{
+   s390_irgen_XONC(Iop_And8, length, start1, start2);
+
+   return "nc";
+}
+
+static HChar *
+s390_irgen_OC(UChar length, IRTemp start1, IRTemp start2)
+{
+   s390_irgen_XONC(Iop_Or8, length, start1, start2);
+
+   return "oc";
+}
+
+
+static HChar *
+s390_irgen_MVC(UChar length, IRTemp start1, IRTemp start2)
+{
+   IRTemp counter = newTemp(Ity_I64);
+
+   assign(counter, get_counter_dw0());
+
+   store(binop(Iop_Add64, mkexpr(start1), mkexpr(counter)),
+         load(Ity_I8, binop(Iop_Add64, mkexpr(start2), mkexpr(counter))));
+
+   /* Check for end of field */
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkU64(length)),
+                     guest_IA_curr_instr);
+   put_counter_dw0(mkU64(0));
+
+   return "mvc";
+}
+
+static HChar *
+s390_irgen_MVCL(UChar r1, UChar r2)
+{
+   IRTemp addr1 = newTemp(Ity_I64);
+   IRTemp addr2 = newTemp(Ity_I64);
+   IRTemp addr2_load = newTemp(Ity_I64);
+   IRTemp r1p1 = newTemp(Ity_I32);   /* contents of r1 + 1 */
+   IRTemp r2p1 = newTemp(Ity_I32);   /* contents of r2 + 1 */
+   IRTemp len1 = newTemp(Ity_I32);
+   IRTemp len2 = newTemp(Ity_I32);
+   IRTemp pad = newTemp(Ity_I8);
+   IRTemp single = newTemp(Ity_I8);
+
+   assign(addr1, get_gpr_dw0(r1));
+   assign(r1p1, get_gpr_w1(r1 + 1));
+   assign(len1, binop(Iop_And32, mkexpr(r1p1), mkU32(0x00ffffff)));
+   assign(addr2, get_gpr_dw0(r2));
+   assign(r2p1, get_gpr_w1(r2 + 1));
+   assign(len2, binop(Iop_And32, mkexpr(r2p1), mkU32(0x00ffffff)));
+   assign(pad, get_gpr_b4(r2 + 1));
+
+   /* len1 == 0 ? */
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, len1, len2, False);
+   if_condition_goto(binop(Iop_CmpEQ32, mkexpr(len1), mkU32(0)),
+                     guest_IA_next_instr);
+
+   /* Check for destructive overlap:
+      addr1 > addr2 && addr2 + len1 > addr1 && (addr2 + len2) > addr1 */
+   s390_cc_set(3);
+   IRTemp cond1 = newTemp(Ity_I32);
+   assign(cond1, unop(Iop_1Uto32,
+                      binop(Iop_CmpLT64U, mkexpr(addr2), mkexpr(addr1))));
+   IRTemp cond2 = newTemp(Ity_I32);
+   assign(cond2, unop(Iop_1Uto32,
+                      binop(Iop_CmpLT64U, mkexpr(addr1),
+                            binop(Iop_Add64, mkexpr(addr2),
+                                  unop(Iop_32Uto64, mkexpr(len1))))));
+   IRTemp cond3 = newTemp(Ity_I32);
+   assign(cond3, unop(Iop_1Uto32,
+                      binop(Iop_CmpLT64U, 
+                            mkexpr(addr1),
+                            binop(Iop_Add64, mkexpr(addr2),
+                                  unop(Iop_32Uto64, mkexpr(len2))))));
+
+   if_condition_goto(binop(Iop_CmpEQ32,
+                           binop(Iop_And32,
+                                 binop(Iop_And32, mkexpr(cond1), mkexpr(cond2)),
+                                 mkexpr(cond3)),
+                           mkU32(1)),
+                     guest_IA_next_instr);
+
+   /* See s390_irgen_CLCL for explanation why we cannot load directly
+      and need two steps. */
+   assign(addr2_load,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr2)));
+   assign(single,
+          mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                mkexpr(pad), load(Ity_I8, mkexpr(addr2_load))));
+
+   store(mkexpr(addr1), mkexpr(single));
+
+   /* Update addr1 and len1 */
+   put_gpr_dw0(r1, binop(Iop_Add64, mkexpr(addr1), mkU64(1)));
+   put_gpr_w1(r1 + 1, binop(Iop_Sub32, mkexpr(r1p1), mkU32(1)));
+
+   /* Update addr2 and len2 */
+   put_gpr_dw0(r2,
+               mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                     mkexpr(addr2),
+                     binop(Iop_Add64, mkexpr(addr2), mkU64(1))));
+
+   /* When updating len2 we must not modify bits (r2+1)[0:39] */
+   put_gpr_w1(r2 + 1,
+              mkite(binop(Iop_CmpEQ32, mkexpr(len2), mkU32(0)),
+                    binop(Iop_And32, mkexpr(r2p1), mkU32(0xFF000000u)),
+                    binop(Iop_Sub32, mkexpr(r2p1), mkU32(1))));
+
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, len1, len2, False);
+   if_condition_goto(binop(Iop_CmpNE32, mkexpr(len1), mkU32(1)),
+                     guest_IA_curr_instr);
+
+   return "mvcl";
+}
+
+
+static HChar *
+s390_irgen_MVCLE(UChar r1, UChar r3, IRTemp pad2)
+{
+   IRTemp addr1, addr3, addr3_load, len1, len3, single;
+
+   addr1 = newTemp(Ity_I64);
+   addr3 = newTemp(Ity_I64);
+   addr3_load = newTemp(Ity_I64);
+   len1 = newTemp(Ity_I64);
+   len3 = newTemp(Ity_I64);
+   single = newTemp(Ity_I8);
+
+   assign(addr1, get_gpr_dw0(r1));
+   assign(len1, get_gpr_dw0(r1 + 1));
+   assign(addr3, get_gpr_dw0(r3));
+   assign(len3, get_gpr_dw0(r3 + 1));
+
+   // len1 == 0 ?
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, len1, len3, False);
+   if_condition_goto(binop(Iop_CmpEQ64,mkexpr(len1), mkU64(0)),
+                     guest_IA_next_instr);
+
+   /* This is a hack to prevent mvcle from reading from addr3 if it
+      should read from the pad. Since the pad has no address, just
+      read from the instruction, we discard that anyway */
+   assign(addr3_load,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                mkU64(guest_IA_curr_instr), mkexpr(addr3)));
+
+   assign(single,
+          mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                unop(Iop_64to8, mkexpr(pad2)),
+                load(Ity_I8, mkexpr(addr3_load))));
+   store(mkexpr(addr1), mkexpr(single));
+
+   put_gpr_dw0(r1, binop(Iop_Add64, mkexpr(addr1), mkU64(1)));
+
+   put_gpr_dw0(r1 + 1, binop(Iop_Sub64, mkexpr(len1), mkU64(1)));
+
+   put_gpr_dw0(r3,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                     mkexpr(addr3),
+                     binop(Iop_Add64, mkexpr(addr3), mkU64(1))));
+
+   put_gpr_dw0(r3 + 1,
+               mkite(binop(Iop_CmpEQ64, mkexpr(len3), mkU64(0)),
+                     mkU64(0), binop(Iop_Sub64, mkexpr(len3), mkU64(1))));
+
+   /* We should set CC=3 (faked by overflow add) and leave after
+      a maximum of ~4096 bytes have been processed. This is simpler:
+      we leave whenever (len1 % 4096) == 0 */
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_ADD_64, mktemp(Ity_I64, mkU64(-1ULL)),
+                      mktemp(Ity_I64, mkU64(-1ULL)), False);
+   if_condition_goto(binop(Iop_CmpEQ64,
+                           binop(Iop_And64, mkexpr(len1), mkU64(0xfff)),
+                           mkU64(0)),
+                     guest_IA_next_instr);
+
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, len1, len3, False);
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(len1), mkU64(1)),
+                     guest_IA_curr_instr);
+
+   return "mvcle";
+}
+
+static HChar *
+s390_irgen_MVST(UChar r1, UChar r2)
+{
+   IRTemp addr1 = newTemp(Ity_I64);
+   IRTemp addr2 = newTemp(Ity_I64);
+   IRTemp end = newTemp(Ity_I8);
+   IRTemp byte = newTemp(Ity_I8);
+   IRTemp counter = newTemp(Ity_I64);
+
+   assign(addr1, get_gpr_dw0(r1));
+   assign(addr2, get_gpr_dw0(r2));
+   assign(counter, get_counter_dw0());
+   assign(end, get_gpr_b7(0));
+   assign(byte, load(Ity_I8, binop(Iop_Add64, mkexpr(addr2),mkexpr(counter))));
+   store(binop(Iop_Add64,mkexpr(addr1),mkexpr(counter)), mkexpr(byte));
+
+   // We use unlimited as cpu-determined number
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   if_condition_goto(binop(Iop_CmpNE8, mkexpr(end), mkexpr(byte)),
+                     guest_IA_curr_instr);
+
+   // and always set cc=1 at the end + update r1
+   s390_cc_set(1);
+   put_gpr_dw0(r1, binop(Iop_Add64, mkexpr(addr1), mkexpr(counter)));
+   put_counter_dw0(mkU64(0));
+
+   return "mvst";
+}
+
+static void
+s390_irgen_divide_64to32(IROp op, UChar r1, IRTemp op2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op1, binop(Iop_32HLto64,
+                     get_gpr_w1(r1),         // high 32 bits
+                     get_gpr_w1(r1 + 1)));   // low  32 bits
+   assign(result, binop(op, mkexpr(op1), mkexpr(op2)));
+   put_gpr_w1(r1, unop(Iop_64HIto32, mkexpr(result)));   // remainder
+   put_gpr_w1(r1 + 1, unop(Iop_64to32, mkexpr(result))); // quotient
+}
+
+static void
+s390_irgen_divide_128to64(IROp op, UChar r1, IRTemp op2)
+{
+   IRTemp op1 = newTemp(Ity_I128);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, binop(Iop_64HLto128,
+                     get_gpr_dw0(r1),         // high 64 bits
+                     get_gpr_dw0(r1 + 1)));   // low  64 bits
+   assign(result, binop(op, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128HIto64, mkexpr(result)));   // remainder
+   put_gpr_dw0(r1 + 1, unop(Iop_128to64, mkexpr(result))); // quotient
+}
+
+static void
+s390_irgen_divide_64to64(IROp op, UChar r1, IRTemp op2)
+{
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I128);
+
+   assign(op1, get_gpr_dw0(r1 + 1));
+   assign(result, binop(op, mkexpr(op1), mkexpr(op2)));
+   put_gpr_dw0(r1, unop(Iop_128HIto64, mkexpr(result)));   // remainder
+   put_gpr_dw0(r1 + 1, unop(Iop_128to64, mkexpr(result))); // quotient
+}
+
+static HChar *
+s390_irgen_DR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+
+   s390_irgen_divide_64to32(Iop_DivModS64to32, r1, op2);
+
+   return "dr";
+}
+
+static HChar *
+s390_irgen_D(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+
+   s390_irgen_divide_64to32(Iop_DivModS64to32, r1, op2);
+
+   return "d";
+}
+
+static HChar *
+s390_irgen_DLR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+
+   s390_irgen_divide_64to32(Iop_DivModU64to32, r1, op2);
+
+   return "dr";
+}
+
+static HChar *
+s390_irgen_DL(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, load(Ity_I32, mkexpr(op2addr)));
+
+   s390_irgen_divide_64to32(Iop_DivModU64to32, r1, op2);
+
+   return "dl";
+}
+
+static HChar *
+s390_irgen_DLG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+
+   s390_irgen_divide_128to64(Iop_DivModU128to64, r1, op2);
+
+   return "dlg";
+}
+
+static HChar *
+s390_irgen_DLGR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+
+   s390_irgen_divide_128to64(Iop_DivModU128to64, r1, op2);
+
+   return "dlgr";
+}
+
+static HChar *
+s390_irgen_DSGR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+
+   s390_irgen_divide_64to64(Iop_DivModS64to64, r1, op2);
+
+   return "dsgr";
+}
+
+static HChar *
+s390_irgen_DSG(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, load(Ity_I64, mkexpr(op2addr)));
+
+   s390_irgen_divide_64to64(Iop_DivModS64to64, r1, op2);
+
+   return "dsg";
+}
+
+static HChar *
+s390_irgen_DSGFR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, get_gpr_w1(r2)));
+
+   s390_irgen_divide_64to64(Iop_DivModS64to64, r1, op2);
+
+   return "dsgfr";
+}
+
+static HChar *
+s390_irgen_DSGF(UChar r1, IRTemp op2addr)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkexpr(op2addr))));
+
+   s390_irgen_divide_64to64(Iop_DivModS64to64, r1, op2);
+
+   return "dsgf";
+}
+
+static void
+s390_irgen_load_ar_multiple(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      put_ar_w0(reg, load(Ity_I32, mkexpr(addr)));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while (reg != (r3 + 1));
+}
+
+static HChar *
+s390_irgen_LAM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_load_ar_multiple(r1, r3, op2addr);
+
+   return "lam";
+}
+
+static HChar *
+s390_irgen_LAMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_load_ar_multiple(r1, r3, op2addr);
+
+   return "lamy";
+}
+
+static void
+s390_irgen_store_ar_multiple(UChar r1, UChar r3, IRTemp op2addr)
+{
+   UChar reg;
+   IRTemp addr = newTemp(Ity_I64);
+
+   assign(addr, mkexpr(op2addr));
+   reg = r1;
+   do {
+      IRTemp old = addr;
+
+      reg %= 16;
+      store(mkexpr(addr), get_ar_w0(reg));
+      addr = newTemp(Ity_I64);
+      assign(addr, binop(Iop_Add64, mkexpr(old), mkU64(4)));
+      reg++;
+   } while (reg != (r3 + 1));
+}
+
+static HChar *
+s390_irgen_STAM(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_store_ar_multiple(r1, r3, op2addr);
+
+   return "stam";
+}
+
+static HChar *
+s390_irgen_STAMY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_store_ar_multiple(r1, r3, op2addr);
+
+   return "stamy";
+}
+
+
+/* Implementation for 32-bit compare-and-swap */
+static void
+s390_irgen_cas_32(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRCAS *cas;
+   IRTemp op1 = newTemp(Ity_I32);
+   IRTemp old_mem = newTemp(Ity_I32);
+   IRTemp op3 = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp nequal = newTemp(Ity_I1);
+
+   assign(op1, get_gpr_w1(r1));
+   assign(op3, get_gpr_w1(r3));
+
+   /* The first and second operands are compared. If they are equal,
+      the third operand is stored at the second- operand location. */
+   cas = mkIRCAS(IRTemp_INVALID, old_mem,
+                 Iend_BE, mkexpr(op2addr),
+                 NULL, mkexpr(op1), /* expected value */
+                 NULL, mkexpr(op3)  /* new value */);
+   stmt(IRStmt_CAS(cas));
+
+   /* Set CC. Operands compared equal -> 0, else 1. */
+   assign(result, binop(Iop_Sub32, mkexpr(op1), mkexpr(old_mem)));
+   s390_cc_thunk_put1(S390_CC_OP_BITWISE, result, False);
+
+   /* If operands were equal (cc == 0) just store the old value op1 in r1.
+      Otherwise, store the old_value from memory in r1 and yield. */
+   assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
+   put_gpr_w1(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
+   stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
+        IRConst_U64(guest_IA_next_instr)));
+}
+
+static HChar *
+s390_irgen_CS(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_cas_32(r1, r3, op2addr);
+
+   return "cs";
+}
+
+static HChar *
+s390_irgen_CSY(UChar r1, UChar r3, IRTemp op2addr)
+{
+   s390_irgen_cas_32(r1, r3, op2addr);
+
+   return "csy";
+}
+
+static HChar *
+s390_irgen_CSG(UChar r1, UChar r3, IRTemp op2addr)
+{
+   IRCAS *cas;
+   IRTemp op1 = newTemp(Ity_I64);
+   IRTemp old_mem = newTemp(Ity_I64);
+   IRTemp op3 = newTemp(Ity_I64);
+   IRTemp result = newTemp(Ity_I64);
+   IRTemp nequal = newTemp(Ity_I1);
+
+   assign(op1, get_gpr_dw0(r1));
+   assign(op3, get_gpr_dw0(r3));
+
+   /* The first and second operands are compared. If they are equal,
+      the third operand is stored at the second- operand location. */
+   cas = mkIRCAS(IRTemp_INVALID, old_mem,
+                 Iend_BE, mkexpr(op2addr),
+                 NULL, mkexpr(op1), /* expected value */
+                 NULL, mkexpr(op3)  /* new value */);
+   stmt(IRStmt_CAS(cas));
+
+   /* Set CC. Operands compared equal -> 0, else 1. */
+   assign(result, binop(Iop_Sub64, mkexpr(op1), mkexpr(old_mem)));
+   s390_cc_thunk_put1(S390_CC_OP_BITWISE, result, False);
+
+   /* If operands were equal (cc == 0) just store the old value op1 in r1.
+      Otherwise, store the old_value from memory in r1 and yield. */
+   assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
+   put_gpr_dw0(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
+   stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
+        IRConst_U64(guest_IA_next_instr)));
+
+   return "csg";
+}
+
+
+/* Binary floating point */
+
+static HChar *
+s390_irgen_AXBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F128);
+   IRTemp op2 = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(op1, get_fpr_pair(r1));
+   assign(op2, get_fpr_pair(r2));
+   assign(result, triop(Iop_AddF128, mkU32(Irrm_NEAREST), mkexpr(op1),
+                        mkexpr(op2)));
+   put_fpr_pair(r1, mkexpr(result));
+
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "axbr";
+}
+
+/* The result of a Iop_CmdFxx operation is a condition code. It is
+   encoded using the values defined in type IRCmpFxxResult.
+   Before we can store the condition code into the guest state (or do
+   anything else with it for that matter) we need to convert it to
+   the encoding that s390 uses. This is what this function does.
+
+   s390     VEX                b6 b2 b0   cc.1  cc.0
+   0      0x40 EQ             1  0  0     0     0
+   1      0x01 LT             0  0  1     0     1
+   2      0x00 GT             0  0  0     1     0
+   3      0x45 Unordered      1  1  1     1     1
+
+   The following bits from the VEX encoding are interesting:
+   b0, b2, b6  with b0 being the LSB. We observe:
+
+   cc.0 = b0;
+   cc.1 = b2 | (~b0 & ~b6)
+
+   with cc being the s390 condition code.
+*/
+static IRExpr *
+convert_vex_fpcc_to_s390(IRTemp vex_cc)
+{
+   IRTemp cc0  = newTemp(Ity_I32);
+   IRTemp cc1  = newTemp(Ity_I32);
+   IRTemp b0   = newTemp(Ity_I32);
+   IRTemp b2   = newTemp(Ity_I32);
+   IRTemp b6   = newTemp(Ity_I32);
+
+   assign(b0, binop(Iop_And32, mkexpr(vex_cc), mkU32(1)));
+   assign(b2, binop(Iop_And32, binop(Iop_Shr32, mkexpr(vex_cc), mkU8(2)),
+                    mkU32(1)));
+   assign(b6, binop(Iop_And32, binop(Iop_Shr32, mkexpr(vex_cc), mkU8(6)),
+                    mkU32(1)));
+
+   assign(cc0, mkexpr(b0));
+   assign(cc1, binop(Iop_Or32, mkexpr(b2),
+                     binop(Iop_And32,
+                           binop(Iop_Sub32, mkU32(1), mkexpr(b0)), /* ~b0 */
+                           binop(Iop_Sub32, mkU32(1), mkexpr(b6))  /* ~b6 */
+                           )));
+
+   return binop(Iop_Or32, mkexpr(cc0), binop(Iop_Shl32, mkexpr(cc1), mkU8(1)));
+}
+
+static HChar *
+s390_irgen_CEBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp cc_vex  = newTemp(Ity_I32);
+   IRTemp cc_s390 = newTemp(Ity_I32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, get_fpr_w0(r2));
+   assign(cc_vex, binop(Iop_CmpF32, mkexpr(op1), mkexpr(op2)));
+
+   assign(cc_s390, convert_vex_fpcc_to_s390(cc_vex));
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
+
+   return "cebr";
+}
+
+static HChar *
+s390_irgen_CDBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp cc_vex  = newTemp(Ity_I32);
+   IRTemp cc_s390 = newTemp(Ity_I32);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, get_fpr_dw0(r2));
+   assign(cc_vex, binop(Iop_CmpF64, mkexpr(op1), mkexpr(op2)));
+
+   assign(cc_s390, convert_vex_fpcc_to_s390(cc_vex));
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
+
+   return "cdbr";
+}
+
+static HChar *
+s390_irgen_CXBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F128);
+   IRTemp op2 = newTemp(Ity_F128);
+   IRTemp cc_vex  = newTemp(Ity_I32);
+   IRTemp cc_s390 = newTemp(Ity_I32);
+
+   assign(op1, get_fpr_pair(r1));
+   assign(op2, get_fpr_pair(r2));
+   assign(cc_vex, binop(Iop_CmpF128, mkexpr(op1), mkexpr(op2)));
+
+   assign(cc_s390, convert_vex_fpcc_to_s390(cc_vex));
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
+
+   return "cxbr";
+}
+
+static HChar *
+s390_irgen_CEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F32);
+   IRTemp op2 = newTemp(Ity_F32);
+   IRTemp cc_vex  = newTemp(Ity_I32);
+   IRTemp cc_s390 = newTemp(Ity_I32);
+
+   assign(op1, get_fpr_w0(r1));
+   assign(op2, load(Ity_F32, mkexpr(op2addr)));
+   assign(cc_vex,  binop(Iop_CmpF32, mkexpr(op1), mkexpr(op2)));
+
+   assign(cc_s390, convert_vex_fpcc_to_s390(cc_vex));
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
+
+   return "ceb";
+}
+
+static HChar *
+s390_irgen_CDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op1 = newTemp(Ity_F64);
+   IRTemp op2 = newTemp(Ity_F64);
+   IRTemp cc_vex  = newTemp(Ity_I32);
+   IRTemp cc_s390 = newTemp(Ity_I32);
+
+   assign(op1, get_fpr_dw0(r1));
+   assign(op2, load(Ity_F64, mkexpr(op2addr)));
+   assign(cc_vex, binop(Iop_CmpF64, mkexpr(op1), mkexpr(op2)));
+
+   assign(cc_s390, convert_vex_fpcc_to_s390(cc_vex));
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
+
+   return "cdb";
+}
+
+static HChar *
+s390_irgen_CXFBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I32);
+
+   assign(op2, get_gpr_w1(r2));
+   put_fpr_pair(r1, unop(Iop_I32StoF128, mkexpr(op2)));
+
+   return "cxfbr";
+}
+
+static HChar *
+s390_irgen_CXGBR(UChar r1, UChar r2)
+{
+   IRTemp op2 = newTemp(Ity_I64);
+
+   assign(op2, get_gpr_dw0(r2));
+   put_fpr_pair(r1, unop(Iop_I64StoF128, mkexpr(op2)));
+
+   return "cxgbr";
+}
+
+static HChar *
+s390_irgen_CFXBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_I32);
+
+   assign(op, get_fpr_pair(r2));
+   assign(result, binop(Iop_F128toI32S, mkU32(encode_rounding_mode(r3)),
+                        mkexpr(op)));
+   put_gpr_w1(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_128_TO_INT_32, op);
+
+   return "cfxbr";
+}
+
+static HChar *
+s390_irgen_CGXBR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_I64);
+
+   assign(op, get_fpr_pair(r2));
+   assign(result, binop(Iop_F128toI64S, mkU32(encode_rounding_mode(r3)),
+                        mkexpr(op)));
+   put_gpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_128_TO_INT_64, op);
+
+   return "cgxbr";
+}
+
+static HChar *
+s390_irgen_DXBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F128);
+   IRTemp op2 = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(op1, get_fpr_pair(r1));
+   assign(op2, get_fpr_pair(r2));
+   assign(result, triop(Iop_DivF128, mkU32(Irrm_NEAREST), mkexpr(op1),
+                        mkexpr(op2)));
+   put_fpr_pair(r1, mkexpr(result));
+
+   return "dxbr";
+}
+
+static HChar *
+s390_irgen_LTXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(result, get_fpr_pair(r2));
+   put_fpr_pair(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "ltxbr";
+}
+
+static HChar *
+s390_irgen_LCXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(result, unop(Iop_NegF128, get_fpr_pair(r2)));
+   put_fpr_pair(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "lcxbr";
+}
+
+static HChar *
+s390_irgen_LXDBR(UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F64);
+
+   assign(op, get_fpr_dw0(r2));
+   put_fpr_pair(r1, unop(Iop_F64toF128, mkexpr(op)));
+
+   return "lxdbr";
+}
+
+static HChar *
+s390_irgen_LXEBR(UChar r1, UChar r2)
+{
+   IRTemp op = newTemp(Ity_F32);
+
+   assign(op, get_fpr_w0(r2));
+   put_fpr_pair(r1, unop(Iop_F32toF128, mkexpr(op)));
+
+   return "lxebr";
+}
+
+static HChar *
+s390_irgen_LXDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_F64);
+
+   assign(op, load(Ity_F64, mkexpr(op2addr)));
+   put_fpr_pair(r1, unop(Iop_F64toF128, mkexpr(op)));
+
+   return "lxdb";
+}
+
+static HChar *
+s390_irgen_LXEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_F32);
+
+   assign(op, load(Ity_F32, mkexpr(op2addr)));
+   put_fpr_pair(r1, unop(Iop_F32toF128, mkexpr(op)));
+
+   return "lxeb";
+}
+
+static HChar *
+s390_irgen_LNEBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, unop(Iop_NegF32, unop(Iop_AbsF32, get_fpr_w0(r2))));
+   put_fpr_w0(r1, mkexpr(result));
+   s390_cc_thunk_put1f(S390_CC_OP_BFP_RESULT_32, result);
+
+   return "lnebr";
+}
+
+static HChar *
+s390_irgen_LNDBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_NegF64, unop(Iop_AbsF64, get_fpr_dw0(r2))));
+   put_fpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_put1f(S390_CC_OP_BFP_RESULT_64, result);
+
+   return "lndbr";
+}
+
+static HChar *
+s390_irgen_LNXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(result, unop(Iop_NegF128, unop(Iop_AbsF128, get_fpr_pair(r2))));
+   put_fpr_pair(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "lnxbr";
+}
+
+static HChar *
+s390_irgen_LPEBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, unop(Iop_AbsF32, get_fpr_w0(r2)));
+   put_fpr_w0(r1, mkexpr(result));
+   s390_cc_thunk_put1f(S390_CC_OP_BFP_RESULT_32, result);
+
+   return "lpebr";
+}
+
+static HChar *
+s390_irgen_LPDBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_AbsF64, get_fpr_dw0(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+   s390_cc_thunk_put1f(S390_CC_OP_BFP_RESULT_64, result);
+
+   return "lpdbr";
+}
+
+static HChar *
+s390_irgen_LPXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(result, unop(Iop_AbsF128, get_fpr_pair(r2)));
+   put_fpr_pair(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "lpxbr";
+}
+
+static HChar *
+s390_irgen_LDXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, binop(Iop_F128toF64, mkU32(Irrm_NEAREST), get_fpr_pair(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "ldxbr";
+}
+
+static HChar *
+s390_irgen_LEXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, binop(Iop_F128toF32, mkU32(Irrm_NEAREST), get_fpr_pair(r2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "lexbr";
+}
+
+static HChar *
+s390_irgen_MXBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F128);
+   IRTemp op2 = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(op1, get_fpr_pair(r1));
+   assign(op2, get_fpr_pair(r2));
+   assign(result, triop(Iop_MulF128, mkU32(Irrm_NEAREST), mkexpr(op1),
+                        mkexpr(op2)));
+   put_fpr_pair(r1, mkexpr(result));
+
+   return "mxbr";
+}
+
+static HChar *
+s390_irgen_MAEBR(UChar r1, UChar r3, UChar r2)
+{
+   put_fpr_w0(r1, qop(Iop_MAddF32, mkU32(Irrm_NEAREST),
+                      get_fpr_w0(r1), get_fpr_w0(r2), get_fpr_w0(r3)));
+
+   return "maebr";
+}
+
+static HChar *
+s390_irgen_MADBR(UChar r1, UChar r3, UChar r2)
+{
+   put_fpr_dw0(r1, qop(Iop_MAddF64, mkU32(Irrm_NEAREST),
+                       get_fpr_dw0(r1), get_fpr_dw0(r2), get_fpr_dw0(r3)));
+
+   return "madbr";
+}
+
+static HChar *
+s390_irgen_MAEB(UChar r3, IRTemp op2addr, UChar r1)
+{
+   IRExpr *op2 = load(Ity_F32, mkexpr(op2addr));
+
+   put_fpr_w0(r1, qop(Iop_MAddF32, mkU32(Irrm_NEAREST),
+                      get_fpr_w0(r1), op2, get_fpr_w0(r3)));
+
+   return "maeb";
+}
+
+static HChar *
+s390_irgen_MADB(UChar r3, IRTemp op2addr, UChar r1)
+{
+   IRExpr *op2 = load(Ity_F64, mkexpr(op2addr));
+
+   put_fpr_dw0(r1, qop(Iop_MAddF64, mkU32(Irrm_NEAREST),
+                       get_fpr_dw0(r1), op2, get_fpr_dw0(r3)));
+
+   return "madb";
+}
+
+static HChar *
+s390_irgen_MSEBR(UChar r1, UChar r3, UChar r2)
+{
+   put_fpr_w0(r1, qop(Iop_MSubF32, mkU32(Irrm_NEAREST),
+                      get_fpr_w0(r1), get_fpr_w0(r2), get_fpr_w0(r3)));
+
+   return "msebr";
+}
+
+static HChar *
+s390_irgen_MSDBR(UChar r1, UChar r3, UChar r2)
+{
+   put_fpr_dw0(r1, qop(Iop_MSubF64, mkU32(Irrm_NEAREST),
+                       get_fpr_dw0(r1), get_fpr_dw0(r2), get_fpr_dw0(r3)));
+
+   return "msdbr";
+}
+
+static HChar *
+s390_irgen_MSEB(UChar r3, IRTemp op2addr, UChar r1)
+{
+   IRExpr *op2 = load(Ity_F32, mkexpr(op2addr));
+
+   put_fpr_w0(r1, qop(Iop_MSubF32, mkU32(Irrm_NEAREST),
+                      get_fpr_w0(r1), op2, get_fpr_w0(r3)));
+
+   return "mseb";
+}
+
+static HChar *
+s390_irgen_MSDB(UChar r3, IRTemp op2addr, UChar r1)
+{
+   IRExpr *op2 = load(Ity_F64, mkexpr(op2addr));
+
+   put_fpr_dw0(r1, qop(Iop_MSubF64, mkU32(Irrm_NEAREST),
+                       get_fpr_dw0(r1), op2, get_fpr_dw0(r3)));
+
+   return "msdb";
+}
+
+static HChar *
+s390_irgen_SQEBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F32);
+
+   assign(result, binop(Iop_SqrtF32, mkU32(Irrm_NEAREST), get_fpr_w0(r2)));
+   put_fpr_w0(r1, mkexpr(result));
+
+   return "sqebr";
+}
+
+static HChar *
+s390_irgen_SQDBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, binop(Iop_SqrtF64, mkU32(Irrm_NEAREST), get_fpr_dw0(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "sqdbr";
+}
+
+static HChar *
+s390_irgen_SQXBR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(result, binop(Iop_SqrtF128, mkU32(Irrm_NEAREST), get_fpr_pair(r2)));
+   put_fpr_pair(r1, mkexpr(result));
+
+   return "sqxbr";
+}
+
+static HChar *
+s390_irgen_SQEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_F32);
+
+   assign(op, load(Ity_F32, mkexpr(op2addr)));
+   put_fpr_w0(r1, binop(Iop_SqrtF32, mkU32(Irrm_NEAREST), mkexpr(op)));
+
+   return "sqeb";
+}
+
+static HChar *
+s390_irgen_SQDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp op = newTemp(Ity_F64);
+
+   assign(op, load(Ity_F64, mkexpr(op2addr)));
+   put_fpr_dw0(r1, binop(Iop_SqrtF64, mkU32(Irrm_NEAREST), mkexpr(op)));
+
+   return "sqdb";
+}
+
+static HChar *
+s390_irgen_SXBR(UChar r1, UChar r2)
+{
+   IRTemp op1 = newTemp(Ity_F128);
+   IRTemp op2 = newTemp(Ity_F128);
+   IRTemp result = newTemp(Ity_F128);
+
+   assign(op1, get_fpr_pair(r1));
+   assign(op2, get_fpr_pair(r2));
+   assign(result, triop(Iop_SubF128, mkU32(Irrm_NEAREST), mkexpr(op1),
+                        mkexpr(op2)));
+   put_fpr_pair(r1, mkexpr(result));
+   s390_cc_thunk_put1f128(S390_CC_OP_BFP_RESULT_128, result);
+
+   return "sxbr";
+}
+
+static HChar *
+s390_irgen_TCEB(UChar r1, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_F32);
+
+   assign(value, get_fpr_w0(r1));
+
+   s390_cc_thunk_putFZ(S390_CC_OP_BFP_TDC_32, value, op2addr);
+
+   return "tceb";
+}
+
+static HChar *
+s390_irgen_TCDB(UChar r1, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_F64);
+
+   assign(value, get_fpr_dw0(r1));
+
+   s390_cc_thunk_putFZ(S390_CC_OP_BFP_TDC_64, value, op2addr);
+
+   return "tcdb";
+}
+
+static HChar *
+s390_irgen_TCXB(UChar r1, IRTemp op2addr)
+{
+   IRTemp value = newTemp(Ity_F128);
+
+   assign(value, get_fpr_pair(r1));
+
+   s390_cc_thunk_put1f128Z(S390_CC_OP_BFP_TDC_128, value, op2addr);
+
+   return "tcxb";
+}
+
+static HChar *
+s390_irgen_LCDFR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_NegF64, get_fpr_dw0(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "lcdfr";
+}
+
+static HChar *
+s390_irgen_LNDFR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_NegF64, unop(Iop_AbsF64, get_fpr_dw0(r2))));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "lndfr";
+}
+
+static HChar *
+s390_irgen_LPDFR(UChar r1, UChar r2)
+{
+   IRTemp result = newTemp(Ity_F64);
+
+   assign(result, unop(Iop_AbsF64, get_fpr_dw0(r2)));
+   put_fpr_dw0(r1, mkexpr(result));
+
+   return "lpdfr";
+}
+
+static HChar *
+s390_irgen_LDGR(UChar r1, UChar r2)
+{
+   put_fpr_dw0(r1, unop(Iop_ReinterpI64asF64, get_gpr_dw0(r2)));
+
+   return "ldgr";
+}
+
+static HChar *
+s390_irgen_LGDR(UChar r1, UChar r2)
+{
+   put_gpr_dw0(r1, unop(Iop_ReinterpF64asI64, get_fpr_dw0(r2)));
+
+   return "lgdr";
+}
+
+
+static HChar *
+s390_irgen_CPSDR(UChar r3, UChar r1, UChar r2)
+{
+   IRTemp sign  = newTemp(Ity_I64);
+   IRTemp value = newTemp(Ity_I64);
+
+   assign(sign, binop(Iop_And64, unop(Iop_ReinterpF64asI64, get_fpr_dw0(r3)),
+                      mkU64(1ULL << 63)));
+   assign(value, binop(Iop_And64, unop(Iop_ReinterpF64asI64, get_fpr_dw0(r2)),
+                       mkU64((1ULL << 63) - 1)));
+   put_fpr_dw0(r1, unop(Iop_ReinterpI64asF64, binop(Iop_Or64, mkexpr(value),
+                                                    mkexpr(sign))));
+
+   return "cpsdr";
+}
+
+
+static UInt
+s390_do_cvb(ULong decimal)
+{
+#if defined(VGA_s390x)
+   UInt binary;
+
+   __asm__ volatile (
+        "cvb %[result],%[input]\n\t"
+          : [result] "=d"(binary)
+          : [input] "m"(decimal)
+   );
+
+   return binary;
+#else
+   return 0;
+#endif
+}
+
+static IRExpr *
+s390_call_cvb(IRExpr *in)
+{
+   IRExpr **args, *call;
+
+   args = mkIRExprVec_1(in);
+   call = mkIRExprCCall(Ity_I32, 0 /*regparm*/,
+                        "s390_do_cvb", &s390_do_cvb, args);
+
+   /* Nothing is excluded from definedness checking. */
+   call->Iex.CCall.cee->mcx_mask = 0;
+
+   return call;
+}
+
+static HChar *
+s390_irgen_CVB(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, s390_call_cvb(load(Ity_I64, mkexpr(op2addr))));
+
+   return "cvb";
+}
+
+static HChar *
+s390_irgen_CVBY(UChar r1, IRTemp op2addr)
+{
+   put_gpr_w1(r1, s390_call_cvb(load(Ity_I64, mkexpr(op2addr))));
+
+   return "cvby";
+}
+
+
+static ULong
+s390_do_cvd(ULong binary_in)
+{
+#if defined(VGA_s390x)
+   UInt binary = binary_in & 0xffffffffULL;
+   ULong decimal;
+
+   __asm__ volatile (
+        "cvd %[input],%[result]\n\t"
+          : [result] "=m"(decimal)
+          : [input] "d"(binary)
+   );
+
+   return decimal;
+#else
+   return 0;
+#endif
+}
+
+static IRExpr *
+s390_call_cvd(IRExpr *in)
+{
+   IRExpr **args, *call;
+
+   args = mkIRExprVec_1(in);
+   call = mkIRExprCCall(Ity_I64, 0 /*regparm*/,
+                        "s390_do_cvd", &s390_do_cvd, args);
+
+   /* Nothing is excluded from definedness checking. */
+   call->Iex.CCall.cee->mcx_mask = 0;
+
+   return call;
+}
+
+static HChar *
+s390_irgen_CVD(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), s390_call_cvd(get_gpr_w1(r1)));
+
+   return "cvd";
+}
+
+static HChar *
+s390_irgen_CVDY(UChar r1, IRTemp op2addr)
+{
+   store(mkexpr(op2addr), s390_call_cvd(get_gpr_w1(r1)));
+
+   return "cvdy";
+}
+
+static HChar *
+s390_irgen_FLOGR(UChar r1, UChar r2)
+{
+   IRTemp input    = newTemp(Ity_I64);
+   IRTemp not_zero = newTemp(Ity_I64);
+   IRTemp tmpnum   = newTemp(Ity_I64);
+   IRTemp num      = newTemp(Ity_I64);
+   IRTemp shift_amount = newTemp(Ity_I8);
+
+   /* We use the "count leading zeroes" operator because the number of
+      leading zeroes is identical with the bit position of the first '1' bit.
+      However, that operator does not work when the input value is zero.
+      Therefore, we set the LSB of the input value to 1 and use Clz64 on
+      the modified value. If input == 0, then the result is 64. Otherwise,
+      the result of Clz64 is what we want. */
+
+   assign(input, get_gpr_dw0(r2));
+   assign(not_zero, binop(Iop_Or64, mkexpr(input), mkU64(1)));
+   assign(tmpnum, unop(Iop_Clz64, mkexpr(not_zero)));
+
+   /* num = (input == 0) ? 64 : tmpnum */
+   assign(num, mkite(binop(Iop_CmpEQ64, mkexpr(input), mkU64(0)),
+                     /* == 0 */ mkU64(64),
+                     /* != 0 */ mkexpr(tmpnum)));
+
+   put_gpr_dw0(r1, mkexpr(num));
+
+   /* Set the leftmost '1' bit of the input value to zero. The general scheme
+      is to first shift the input value by NUM + 1 bits to the left which
+      causes the leftmost '1' bit to disappear. Then we shift logically to
+      the right by NUM + 1 bits. Because the semantics of Iop_Shl64 and
+      Iop_Shr64 are undefined if the shift-amount is greater than or equal to
+      the width of the value-to-be-shifted, we need to special case
+      NUM + 1 >= 64. This is equivalent to INPUT != 0 && INPUT != 1.
+      For both such INPUT values the result will be 0. */
+
+   assign(shift_amount, unop(Iop_64to8, binop(Iop_Add64, mkexpr(num),
+                          mkU64(1))));
+
+   put_gpr_dw0(r1 + 1,
+               mkite(binop(Iop_CmpLE64U, mkexpr(input), mkU64(1)),
+                     /* == 0 || == 1*/ mkU64(0),
+                     /* otherwise */
+                     binop(Iop_Shr64,
+                           binop(Iop_Shl64, mkexpr(input),
+                                 mkexpr(shift_amount)),
+                           mkexpr(shift_amount))));
+
+   /* Compare the original value as an unsigned integer with 0. */
+   s390_cc_thunk_put2(S390_CC_OP_UNSIGNED_COMPARE, input,
+                      mktemp(Ity_I64, mkU64(0)), False);
+
+   return "flogr";
+}
+
+static HChar *
+s390_irgen_STCK(IRTemp op2addr)
+{
+   IRDirty *d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_STCK",
+                         &s390x_dirtyhelper_STCK,
+                         mkIRExprVec_1(mkexpr(op2addr)));
+   d->mFx   = Ifx_Write;
+   d->mAddr = mkexpr(op2addr);
+   d->mSize = 8;
+   stmt(IRStmt_Dirty(d));
+   s390_cc_thunk_fill(mkU64(S390_CC_OP_SET),
+                      mkexpr(cc), mkU64(0), mkU64(0));
+   return "stck";
+}
+
+static HChar *
+s390_irgen_STCKF(IRTemp op2addr)
+{
+   IRDirty *d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_STCKF",
+                         &s390x_dirtyhelper_STCKF,
+                         mkIRExprVec_1(mkexpr(op2addr)));
+   d->mFx   = Ifx_Write;
+   d->mAddr = mkexpr(op2addr);
+   d->mSize = 8;
+   stmt(IRStmt_Dirty(d));
+   s390_cc_thunk_fill(mkU64(S390_CC_OP_SET),
+                      mkexpr(cc), mkU64(0), mkU64(0));
+   return "stckf";
+}
+
+static HChar *
+s390_irgen_STCKE(IRTemp op2addr)
+{
+   IRDirty *d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_STCKE",
+                         &s390x_dirtyhelper_STCKE,
+                         mkIRExprVec_1(mkexpr(op2addr)));
+   d->mFx   = Ifx_Write;
+   d->mAddr = mkexpr(op2addr);
+   d->mSize = 16;
+   stmt(IRStmt_Dirty(d));
+   s390_cc_thunk_fill(mkU64(S390_CC_OP_SET),
+                      mkexpr(cc), mkU64(0), mkU64(0));
+   return "stcke";
+}
+
+static HChar *
+s390_irgen_STFLE(IRTemp op2addr)
+{
+   IRDirty *d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_STFLE",
+                         &s390x_dirtyhelper_STFLE,
+                         mkIRExprVec_1(mkexpr(op2addr)));
+
+   d->needsBBP = 1;  /* Need to pass pointer to guest state to helper */
+
+   d->fxState[0].fx     = Ifx_Modify;  /* read then write */
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_r0);
+   d->fxState[0].size   = sizeof(ULong);
+   d->nFxState = 1;
+
+   d->mAddr = mkexpr(op2addr);
+   /* Pretend all double words are written */
+   d->mSize = S390_NUM_FACILITY_DW * sizeof(ULong);
+   d->mFx   = Ifx_Write;
+
+   stmt(IRStmt_Dirty(d));
+
+   s390_cc_thunk_fill(mkU64(S390_CC_OP_SET), mkexpr(cc), mkU64(0), mkU64(0));
+
+   return "stfle";
+}
+
+static HChar *
+s390_irgen_CKSM(UChar r1,UChar r2)
+{
+   IRTemp addr = newTemp(Ity_I64);
+   IRTemp op = newTemp(Ity_I32);
+   IRTemp len = newTemp(Ity_I64);
+   IRTemp oldval = newTemp(Ity_I32);
+   IRTemp mask = newTemp(Ity_I32);
+   IRTemp newop = newTemp(Ity_I32);
+   IRTemp result = newTemp(Ity_I32);
+   IRTemp result1 = newTemp(Ity_I32);
+   IRTemp inc = newTemp(Ity_I64);
+
+   assign(oldval, get_gpr_w1(r1));
+   assign(addr, get_gpr_dw0(r2));
+   assign(len, get_gpr_dw0(r2+1));
+
+   /* Condition code is always zero. */
+   s390_cc_set(0);
+
+   /* If length is zero, there is no need to calculate the checksum */
+   if_condition_goto(binop(Iop_CmpEQ64, mkexpr(len), mkU64(0)),
+                     guest_IA_next_instr);
+
+   /* Assiging the increment variable to adjust address and length
+      later on. */
+   assign(inc, mkite(binop(Iop_CmpLT64U, mkexpr(len), mkU64(4)),
+                           mkexpr(len), mkU64(4)));
+
+   /* If length < 4 the final 4-byte 2nd operand value is computed by 
+      appending the remaining bytes to the right with 0. This is done
+      by AND'ing the 4 bytes loaded from memory with an appropriate
+      mask. If length >= 4, that mask is simply 0xffffffff. */
+
+   assign(mask, mkite(binop(Iop_CmpLT64U, mkexpr(len), mkU64(4)),
+                      /* Mask computation when len < 4:
+                         0xffffffff << (32 - (len % 4)*8) */
+                      binop(Iop_Shl32, mkU32(0xffffffff),
+                            unop(Iop_32to8,
+                                 binop(Iop_Sub32, mkU32(32),
+                                       binop(Iop_Shl32,
+                                             unop(Iop_64to32,
+                                                  binop(Iop_And64,
+                                                        mkexpr(len), mkU64(3))),
+                                             mkU8(3))))),
+                      mkU32(0xffffffff)));
+
+   assign(op, load(Ity_I32, mkexpr(addr)));
+   assign(newop, binop(Iop_And32, mkexpr(op), mkexpr(mask)));
+   assign(result, binop(Iop_Add32, mkexpr(newop), mkexpr(oldval)));
+
+   /* Checking for carry */
+   assign(result1, mkite(binop(Iop_CmpLT32U, mkexpr(result), mkexpr(newop)),
+                         binop(Iop_Add32, mkexpr(result), mkU32(1)),
+                         mkexpr(result)));
+
+   put_gpr_w1(r1, mkexpr(result1));
+   put_gpr_dw0(r2, binop(Iop_Add64, mkexpr(addr), mkexpr(inc)));
+   put_gpr_dw0(r2+1, binop(Iop_Sub64, mkexpr(len), mkexpr(inc)));
+
+   if_condition_goto(binop(Iop_CmpNE64, mkexpr(len), mkU64(0)),
+                     guest_IA_curr_instr);
+
+   return "cksm";
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Build IR for special instructions                    ---*/
+/*------------------------------------------------------------*/
+
+static void
+s390_irgen_client_request(void)
+{
+   if (0)
+      vex_printf("%%R3 = client_request ( %%R2 )\n");
+
+   irsb->next = mkU64((ULong)(guest_IA_curr_instr
+                              + S390_SPECIAL_OP_PREAMBLE_SIZE
+                              + S390_SPECIAL_OP_SIZE));
+   irsb->jumpkind = Ijk_ClientReq;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+static void
+s390_irgen_guest_NRADDR(void)
+{
+   if (0)
+      vex_printf("%%R3 = guest_NRADDR\n");
+
+   put_gpr_dw0(3, IRExpr_Get(S390X_GUEST_OFFSET(guest_NRADDR), Ity_I64));
+}
+
+static void
+s390_irgen_call_noredir(void)
+{
+   /* Continue after special op */
+   put_gpr_dw0(14, mkU64(guest_IA_curr_instr
+                         + S390_SPECIAL_OP_PREAMBLE_SIZE
+                         + S390_SPECIAL_OP_SIZE));
+
+   /* The address is in REG1, all parameters are in the right (guest) places */
+   irsb->next     = get_gpr_dw0(1);
+   irsb->jumpkind = Ijk_NoRedir;
+
+   dis_res->whatNext = Dis_StopHere;
+}
+
+/* Force proper alignment for the structures below. */
+#pragma pack(1)
+
+
+static s390_decode_t
+s390_decode_2byte_and_irgen(UChar *bytes)
+{
+   typedef union {
+      struct {
+         unsigned int op : 16;
+      } E;
+      struct {
+         unsigned int op :  8;
+         unsigned int i  :  8;
+      } I;
+      struct {
+         unsigned int op :  8;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RR;
+   } formats;
+   union {
+      formats fmt;
+      UShort value;
+   } ovl;
+
+   vassert(sizeof(formats) == 2);
+
+   ((char *)(&ovl.value))[0] = bytes[0];
+   ((char *)(&ovl.value))[1] = bytes[1];
+
+   switch (ovl.value & 0xffff) {
+   case 0x0000: /* invalid opcode */
+      s390_format_RR_RR(s390_irgen_00, 0, 0); goto ok;
+   case 0x0101: /* PR */ goto unimplemented;
+   case 0x0102: /* UPT */ goto unimplemented;
+   case 0x0104: /* PTFF */ goto unimplemented;
+   case 0x0107: /* SCKPF */ goto unimplemented;
+   case 0x010a: /* PFPO */ goto unimplemented;
+   case 0x010b: /* TAM */ goto unimplemented;
+   case 0x010c: /* SAM24 */ goto unimplemented;
+   case 0x010d: /* SAM31 */ goto unimplemented;
+   case 0x010e: /* SAM64 */ goto unimplemented;
+   case 0x01ff: /* TRAP2 */ goto unimplemented;
+   }
+
+   switch ((ovl.value & 0xff00) >> 8) {
+   case 0x04: /* SPM */ goto unimplemented;
+   case 0x05: /* BALR */ goto unimplemented;
+   case 0x06: s390_format_RR_RR(s390_irgen_BCTR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x07: s390_format_RR(s390_irgen_BCR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                             goto ok;
+   case 0x0a: s390_format_I(s390_irgen_SVC, ovl.fmt.I.i);  goto ok;
+   case 0x0b: /* BSM */ goto unimplemented;
+   case 0x0c: /* BASSM */ goto unimplemented;
+   case 0x0d: s390_format_RR_RR(s390_irgen_BASR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x0e: s390_format_RR(s390_irgen_MVCL, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                             goto ok;
+   case 0x0f: s390_format_RR(s390_irgen_CLCL, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                             goto ok;
+   case 0x10: s390_format_RR_RR(s390_irgen_LPR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x11: s390_format_RR_RR(s390_irgen_LNR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x12: s390_format_RR_RR(s390_irgen_LTR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x13: s390_format_RR_RR(s390_irgen_LCR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x14: s390_format_RR_RR(s390_irgen_NR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x15: s390_format_RR_RR(s390_irgen_CLR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x16: s390_format_RR_RR(s390_irgen_OR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x17: s390_format_RR_RR(s390_irgen_XR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x18: s390_format_RR_RR(s390_irgen_LR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x19: s390_format_RR_RR(s390_irgen_CR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1a: s390_format_RR_RR(s390_irgen_AR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1b: s390_format_RR_RR(s390_irgen_SR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1c: s390_format_RR_RR(s390_irgen_MR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1d: s390_format_RR_RR(s390_irgen_DR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1e: s390_format_RR_RR(s390_irgen_ALR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x1f: s390_format_RR_RR(s390_irgen_SLR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x20: /* LPDR */ goto unimplemented;
+   case 0x21: /* LNDR */ goto unimplemented;
+   case 0x22: /* LTDR */ goto unimplemented;
+   case 0x23: /* LCDR */ goto unimplemented;
+   case 0x24: /* HDR */ goto unimplemented;
+   case 0x25: /* LDXR */ goto unimplemented;
+   case 0x26: /* MXR */ goto unimplemented;
+   case 0x27: /* MXDR */ goto unimplemented;
+   case 0x28: s390_format_RR_FF(s390_irgen_LDR, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x29: /* CDR */ goto unimplemented;
+   case 0x2a: /* ADR */ goto unimplemented;
+   case 0x2b: /* SDR */ goto unimplemented;
+   case 0x2c: /* MDR */ goto unimplemented;
+   case 0x2d: /* DDR */ goto unimplemented;
+   case 0x2e: /* AWR */ goto unimplemented;
+   case 0x2f: /* SWR */ goto unimplemented;
+   case 0x30: /* LPER */ goto unimplemented;
+   case 0x31: /* LNER */ goto unimplemented;
+   case 0x32: /* LTER */ goto unimplemented;
+   case 0x33: /* LCER */ goto unimplemented;
+   case 0x34: /* HER */ goto unimplemented;
+   case 0x35: /* LEDR */ goto unimplemented;
+   case 0x36: /* AXR */ goto unimplemented;
+   case 0x37: /* SXR */ goto unimplemented;
+   case 0x38: s390_format_RR_FF(s390_irgen_LER, ovl.fmt.RR.r1, ovl.fmt.RR.r2);
+                                goto ok;
+   case 0x39: /* CER */ goto unimplemented;
+   case 0x3a: /* AER */ goto unimplemented;
+   case 0x3b: /* SER */ goto unimplemented;
+   case 0x3c: /* MDER */ goto unimplemented;
+   case 0x3d: /* DER */ goto unimplemented;
+   case 0x3e: /* AUR */ goto unimplemented;
+   case 0x3f: /* SUR */ goto unimplemented;
+   }
+
+   return S390_DECODE_UNKNOWN_INSN;
+
+ok:
+   return S390_DECODE_OK;
+
+unimplemented:
+   return S390_DECODE_UNIMPLEMENTED_INSN;
+}
+
+static s390_decode_t
+s390_decode_4byte_and_irgen(UChar *bytes)
+{
+   typedef union {
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int op2 :  4;
+         unsigned int i2  : 16;
+      } RI;
+      struct {
+         unsigned int op : 16;
+         unsigned int    :  8;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RRE;
+      struct {
+         unsigned int op : 16;
+         unsigned int r1 :  4;
+         unsigned int    :  4;
+         unsigned int r3 :  4;
+         unsigned int r2 :  4;
+      } RRF;
+      struct {
+         unsigned int op : 16;
+         unsigned int r3 :  4;
+         unsigned int m4 :  4;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RRF2;
+      struct {
+         unsigned int op : 16;
+         unsigned int r3 :  4;
+         unsigned int    :  4;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RRF3;
+      struct {
+         unsigned int op : 16;
+         unsigned int r3 :  4;
+         unsigned int    :  4;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RRR;
+      struct {
+         unsigned int op : 16;
+         unsigned int r3 :  4;
+         unsigned int    :  4;
+         unsigned int r1 :  4;
+         unsigned int r2 :  4;
+      } RRF4;
+      struct {
+         unsigned int op :  8;
+         unsigned int r1 :  4;
+         unsigned int r3 :  4;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } RS;
+      struct {
+         unsigned int op :  8;
+         unsigned int r1 :  4;
+         unsigned int r3 :  4;
+         unsigned int i2 : 16;
+      } RSI;
+      struct {
+         unsigned int op :  8;
+         unsigned int r1 :  4;
+         unsigned int x2 :  4;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } RX;
+      struct {
+         unsigned int op : 16;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } S;
+      struct {
+         unsigned int op :  8;
+         unsigned int i2 :  8;
+         unsigned int b1 :  4;
+         unsigned int d1 : 12;
+      } SI;
+   } formats;
+   union {
+      formats fmt;
+      UInt value;
+   } ovl;
+
+   vassert(sizeof(formats) == 4);
+
+   ((char *)(&ovl.value))[0] = bytes[0];
+   ((char *)(&ovl.value))[1] = bytes[1];
+   ((char *)(&ovl.value))[2] = bytes[2];
+   ((char *)(&ovl.value))[3] = bytes[3];
+
+   switch ((ovl.value & 0xff0f0000) >> 16) {
+   case 0xa500: s390_format_RI_RU(s390_irgen_IIHH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa501: s390_format_RI_RU(s390_irgen_IIHL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa502: s390_format_RI_RU(s390_irgen_IILH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa503: s390_format_RI_RU(s390_irgen_IILL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa504: s390_format_RI_RU(s390_irgen_NIHH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa505: s390_format_RI_RU(s390_irgen_NIHL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa506: s390_format_RI_RU(s390_irgen_NILH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa507: s390_format_RI_RU(s390_irgen_NILL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa508: s390_format_RI_RU(s390_irgen_OIHH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa509: s390_format_RI_RU(s390_irgen_OIHL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50a: s390_format_RI_RU(s390_irgen_OILH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50b: s390_format_RI_RU(s390_irgen_OILL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50c: s390_format_RI_RU(s390_irgen_LLIHH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50d: s390_format_RI_RU(s390_irgen_LLIHL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50e: s390_format_RI_RU(s390_irgen_LLILH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa50f: s390_format_RI_RU(s390_irgen_LLILL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa700: s390_format_RI_RU(s390_irgen_TMLH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa701: s390_format_RI_RU(s390_irgen_TMLL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa702: s390_format_RI_RU(s390_irgen_TMHH, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa703: s390_format_RI_RU(s390_irgen_TMHL, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa704: s390_format_RI(s390_irgen_BRC, ovl.fmt.RI.r1, ovl.fmt.RI.i2);
+                               goto ok;
+   case 0xa705: s390_format_RI_RP(s390_irgen_BRAS, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa706: s390_format_RI_RP(s390_irgen_BRCT, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa707: s390_format_RI_RP(s390_irgen_BRCTG, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa708: s390_format_RI_RI(s390_irgen_LHI, ovl.fmt.RI.r1, ovl.fmt.RI.i2);
+                                  goto ok;
+   case 0xa709: s390_format_RI_RI(s390_irgen_LGHI, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa70a: s390_format_RI_RI(s390_irgen_AHI, ovl.fmt.RI.r1, ovl.fmt.RI.i2);
+                                  goto ok;
+   case 0xa70b: s390_format_RI_RI(s390_irgen_AGHI, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa70c: s390_format_RI_RI(s390_irgen_MHI, ovl.fmt.RI.r1, ovl.fmt.RI.i2);
+                                  goto ok;
+   case 0xa70d: s390_format_RI_RI(s390_irgen_MGHI, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   case 0xa70e: s390_format_RI_RI(s390_irgen_CHI, ovl.fmt.RI.r1, ovl.fmt.RI.i2);
+                                  goto ok;
+   case 0xa70f: s390_format_RI_RI(s390_irgen_CGHI, ovl.fmt.RI.r1,
+                                  ovl.fmt.RI.i2);  goto ok;
+   }
+
+   switch ((ovl.value & 0xffff0000) >> 16) {
+   case 0x8000: /* SSM */ goto unimplemented;
+   case 0x8200: /* LPSW */ goto unimplemented;
+   case 0x9300: s390_format_S_RD(s390_irgen_TS, ovl.fmt.S.b2, ovl.fmt.S.d2);
+                                 goto ok;
+   case 0xb202: /* STIDP */ goto unimplemented;
+   case 0xb204: /* SCK */ goto unimplemented;
+   case 0xb205: s390_format_S_RD(s390_irgen_STCK, ovl.fmt.S.b2, ovl.fmt.S.d2);goto ok;
+   case 0xb206: /* SCKC */ goto unimplemented;
+   case 0xb207: /* STCKC */ goto unimplemented;
+   case 0xb208: /* SPT */ goto unimplemented;
+   case 0xb209: /* STPT */ goto unimplemented;
+   case 0xb20a: /* SPKA */ goto unimplemented;
+   case 0xb20b: /* IPK */ goto unimplemented;
+   case 0xb20d: /* PTLB */ goto unimplemented;
+   case 0xb210: /* SPX */ goto unimplemented;
+   case 0xb211: /* STPX */ goto unimplemented;
+   case 0xb212: /* STAP */ goto unimplemented;
+   case 0xb214: /* SIE */ goto unimplemented;
+   case 0xb218: /* PC */ goto unimplemented;
+   case 0xb219: /* SAC */ goto unimplemented;
+   case 0xb21a: /* CFC */ goto unimplemented;
+   case 0xb221: /* IPTE */ goto unimplemented;
+   case 0xb222: s390_format_RRE_R0(s390_irgen_IPM, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb223: /* IVSK */ goto unimplemented;
+   case 0xb224: /* IAC */ goto unimplemented;
+   case 0xb225: /* SSAR */ goto unimplemented;
+   case 0xb226: /* EPAR */ goto unimplemented;
+   case 0xb227: /* ESAR */ goto unimplemented;
+   case 0xb228: /* PT */ goto unimplemented;
+   case 0xb229: /* ISKE */ goto unimplemented;
+   case 0xb22a: /* RRBE */ goto unimplemented;
+   case 0xb22b: /* SSKE */ goto unimplemented;
+   case 0xb22c: /* TB */ goto unimplemented;
+   case 0xb22d: /* DXR */ goto unimplemented;
+   case 0xb22e: /* PGIN */ goto unimplemented;
+   case 0xb22f: /* PGOUT */ goto unimplemented;
+   case 0xb230: /* CSCH */ goto unimplemented;
+   case 0xb231: /* HSCH */ goto unimplemented;
+   case 0xb232: /* MSCH */ goto unimplemented;
+   case 0xb233: /* SSCH */ goto unimplemented;
+   case 0xb234: /* STSCH */ goto unimplemented;
+   case 0xb235: /* TSCH */ goto unimplemented;
+   case 0xb236: /* TPI */ goto unimplemented;
+   case 0xb237: /* SAL */ goto unimplemented;
+   case 0xb238: /* RSCH */ goto unimplemented;
+   case 0xb239: /* STCRW */ goto unimplemented;
+   case 0xb23a: /* STCPS */ goto unimplemented;
+   case 0xb23b: /* RCHP */ goto unimplemented;
+   case 0xb23c: /* SCHM */ goto unimplemented;
+   case 0xb240: /* BAKR */ goto unimplemented;
+   case 0xb241: s390_format_RRE(s390_irgen_CKSM, ovl.fmt.RRE.r1,
+                                ovl.fmt.RRE.r2);  goto ok;
+   case 0xb244: /* SQDR */ goto unimplemented;
+   case 0xb245: /* SQER */ goto unimplemented;
+   case 0xb246: /* STURA */ goto unimplemented;
+   case 0xb247: /* MSTA */ goto unimplemented;
+   case 0xb248: /* PALB */ goto unimplemented;
+   case 0xb249: /* EREG */ goto unimplemented;
+   case 0xb24a: /* ESTA */ goto unimplemented;
+   case 0xb24b: /* LURA */ goto unimplemented;
+   case 0xb24c: /* TAR */ goto unimplemented;
+   case 0xb24d: s390_format_RRE(s390_irgen_CPYA, ovl.fmt.RRE.r1,
+                                ovl.fmt.RRE.r2);  goto ok;
+   case 0xb24e: s390_format_RRE(s390_irgen_SAR, ovl.fmt.RRE.r1, ovl.fmt.RRE.r2);
+                                goto ok;
+   case 0xb24f: s390_format_RRE(s390_irgen_EAR, ovl.fmt.RRE.r1, ovl.fmt.RRE.r2);
+                                goto ok;
+   case 0xb250: /* CSP */ goto unimplemented;
+   case 0xb252: s390_format_RRE_RR(s390_irgen_MSR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb254: /* MVPG */ goto unimplemented;
+   case 0xb255: s390_format_RRE_RR(s390_irgen_MVST, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb257: /* CUSE */ goto unimplemented;
+   case 0xb258: /* BSG */ goto unimplemented;
+   case 0xb25a: /* BSA */ goto unimplemented;
+   case 0xb25d: s390_format_RRE_RR(s390_irgen_CLST, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb25e: s390_format_RRE_RR(s390_irgen_SRST, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb263: /* CMPSC */ goto unimplemented;
+   case 0xb274: /* SIGA */ goto unimplemented;
+   case 0xb276: /* XSCH */ goto unimplemented;
+   case 0xb277: /* RP */ goto unimplemented;
+   case 0xb278: s390_format_S_RD(s390_irgen_STCKE, ovl.fmt.S.b2, ovl.fmt.S.d2);goto ok;
+   case 0xb279: /* SACF */ goto unimplemented;
+   case 0xb27c: s390_format_S_RD(s390_irgen_STCKF, ovl.fmt.S.b2, ovl.fmt.S.d2);goto ok;
+   case 0xb27d: /* STSI */ goto unimplemented;
+   case 0xb299: s390_format_S_RD(s390_irgen_SRNM, ovl.fmt.S.b2, ovl.fmt.S.d2);
+                                 goto ok;
+   case 0xb29c: s390_format_S_RD(s390_irgen_STFPC, ovl.fmt.S.b2, ovl.fmt.S.d2);
+                                 goto ok;
+   case 0xb29d: s390_format_S_RD(s390_irgen_LFPC, ovl.fmt.S.b2, ovl.fmt.S.d2);
+                                 goto ok;
+   case 0xb2a5: /* TRE */ goto unimplemented;
+   case 0xb2a6: /* CU21 */ goto unimplemented;
+   case 0xb2a7: /* CU12 */ goto unimplemented;
+   case 0xb2b0: s390_format_S_RD(s390_irgen_STFLE, ovl.fmt.S.b2, ovl.fmt.S.d2);
+                                 goto ok;
+   case 0xb2b1: /* STFL */ goto unimplemented;
+   case 0xb2b2: /* LPSWE */ goto unimplemented;
+   case 0xb2b8: /* SRNMB */ goto unimplemented;
+   case 0xb2b9: /* SRNMT */ goto unimplemented;
+   case 0xb2bd: /* LFAS */ goto unimplemented;
+   case 0xb2ff: /* TRAP4 */ goto unimplemented;
+   case 0xb300: s390_format_RRE_FF(s390_irgen_LPEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb301: s390_format_RRE_FF(s390_irgen_LNEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb302: s390_format_RRE_FF(s390_irgen_LTEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb303: s390_format_RRE_FF(s390_irgen_LCEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb304: s390_format_RRE_FF(s390_irgen_LDEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb305: s390_format_RRE_FF(s390_irgen_LXDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb306: s390_format_RRE_FF(s390_irgen_LXEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb307: /* MXDBR */ goto unimplemented;
+   case 0xb308: /* KEBR */ goto unimplemented;
+   case 0xb309: s390_format_RRE_FF(s390_irgen_CEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb30a: s390_format_RRE_FF(s390_irgen_AEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb30b: s390_format_RRE_FF(s390_irgen_SEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb30c: /* MDEBR */ goto unimplemented;
+   case 0xb30d: s390_format_RRE_FF(s390_irgen_DEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb30e: s390_format_RRF_F0FF(s390_irgen_MAEBR, ovl.fmt.RRF.r1,
+                                     ovl.fmt.RRF.r3, ovl.fmt.RRF.r2);  goto ok;
+   case 0xb30f: s390_format_RRF_F0FF(s390_irgen_MSEBR, ovl.fmt.RRF.r1,
+                                     ovl.fmt.RRF.r3, ovl.fmt.RRF.r2);  goto ok;
+   case 0xb310: s390_format_RRE_FF(s390_irgen_LPDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb311: s390_format_RRE_FF(s390_irgen_LNDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb312: s390_format_RRE_FF(s390_irgen_LTDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb313: s390_format_RRE_FF(s390_irgen_LCDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb314: s390_format_RRE_FF(s390_irgen_SQEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb315: s390_format_RRE_FF(s390_irgen_SQDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb316: s390_format_RRE_FF(s390_irgen_SQXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb317: s390_format_RRE_FF(s390_irgen_MEEBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb318: /* KDBR */ goto unimplemented;
+   case 0xb319: s390_format_RRE_FF(s390_irgen_CDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb31a: s390_format_RRE_FF(s390_irgen_ADBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb31b: s390_format_RRE_FF(s390_irgen_SDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb31c: s390_format_RRE_FF(s390_irgen_MDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb31d: s390_format_RRE_FF(s390_irgen_DDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb31e: s390_format_RRF_F0FF(s390_irgen_MADBR, ovl.fmt.RRF.r1,
+                                     ovl.fmt.RRF.r3, ovl.fmt.RRF.r2);  goto ok;
+   case 0xb31f: s390_format_RRF_F0FF(s390_irgen_MSDBR, ovl.fmt.RRF.r1,
+                                     ovl.fmt.RRF.r3, ovl.fmt.RRF.r2);  goto ok;
+   case 0xb324: /* LDER */ goto unimplemented;
+   case 0xb325: /* LXDR */ goto unimplemented;
+   case 0xb326: /* LXER */ goto unimplemented;
+   case 0xb32e: /* MAER */ goto unimplemented;
+   case 0xb32f: /* MSER */ goto unimplemented;
+   case 0xb336: /* SQXR */ goto unimplemented;
+   case 0xb337: /* MEER */ goto unimplemented;
+   case 0xb338: /* MAYLR */ goto unimplemented;
+   case 0xb339: /* MYLR */ goto unimplemented;
+   case 0xb33a: /* MAYR */ goto unimplemented;
+   case 0xb33b: /* MYR */ goto unimplemented;
+   case 0xb33c: /* MAYHR */ goto unimplemented;
+   case 0xb33d: /* MYHR */ goto unimplemented;
+   case 0xb33e: /* MADR */ goto unimplemented;
+   case 0xb33f: /* MSDR */ goto unimplemented;
+   case 0xb340: s390_format_RRE_FF(s390_irgen_LPXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb341: s390_format_RRE_FF(s390_irgen_LNXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb342: s390_format_RRE_FF(s390_irgen_LTXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb343: s390_format_RRE_FF(s390_irgen_LCXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb344: s390_format_RRE_FF(s390_irgen_LEDBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb345: s390_format_RRE_FF(s390_irgen_LDXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb346: s390_format_RRE_FF(s390_irgen_LEXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb347: /* FIXBR */ goto unimplemented;
+   case 0xb348: /* KXBR */ goto unimplemented;
+   case 0xb349: s390_format_RRE_FF(s390_irgen_CXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb34a: s390_format_RRE_FF(s390_irgen_AXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb34b: s390_format_RRE_FF(s390_irgen_SXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb34c: s390_format_RRE_FF(s390_irgen_MXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb34d: s390_format_RRE_FF(s390_irgen_DXBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb350: /* TBEDR */ goto unimplemented;
+   case 0xb351: /* TBDR */ goto unimplemented;
+   case 0xb353: /* DIEBR */ goto unimplemented;
+   case 0xb357: /* FIEBR */ goto unimplemented;
+   case 0xb358: /* THDER */ goto unimplemented;
+   case 0xb359: /* THDR */ goto unimplemented;
+   case 0xb35b: /* DIDBR */ goto unimplemented;
+   case 0xb35f: /* FIDBR */ goto unimplemented;
+   case 0xb360: /* LPXR */ goto unimplemented;
+   case 0xb361: /* LNXR */ goto unimplemented;
+   case 0xb362: /* LTXR */ goto unimplemented;
+   case 0xb363: /* LCXR */ goto unimplemented;
+   case 0xb365: s390_format_RRE_FF(s390_irgen_LXR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb366: /* LEXR */ goto unimplemented;
+   case 0xb367: /* FIXR */ goto unimplemented;
+   case 0xb369: /* CXR */ goto unimplemented;
+   case 0xb370: s390_format_RRE_FF(s390_irgen_LPDFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb371: s390_format_RRE_FF(s390_irgen_LNDFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb372: s390_format_RRF_F0FF2(s390_irgen_CPSDR, ovl.fmt.RRF3.r3,
+                                      ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                      goto ok;
+   case 0xb373: s390_format_RRE_FF(s390_irgen_LCDFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb374: s390_format_RRE_F0(s390_irgen_LZER, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb375: s390_format_RRE_F0(s390_irgen_LZDR, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb376: s390_format_RRE_F0(s390_irgen_LZXR, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb377: /* FIER */ goto unimplemented;
+   case 0xb37f: /* FIDR */ goto unimplemented;
+   case 0xb384: s390_format_RRE_R0(s390_irgen_SFPC, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb385: /* SFASR */ goto unimplemented;
+   case 0xb38c: s390_format_RRE_R0(s390_irgen_EFPC, ovl.fmt.RRE.r1);  goto ok;
+   case 0xb390: /* CELFBR */ goto unimplemented;
+   case 0xb391: /* CDLFBR */ goto unimplemented;
+   case 0xb392: /* CXLFBR */ goto unimplemented;
+   case 0xb394: s390_format_RRE_FR(s390_irgen_CEFBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb395: s390_format_RRE_FR(s390_irgen_CDFBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb396: s390_format_RRE_FR(s390_irgen_CXFBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb398: s390_format_RRF_U0RF(s390_irgen_CFEBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb399: s390_format_RRF_U0RF(s390_irgen_CFDBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb39a: s390_format_RRF_U0RF(s390_irgen_CFXBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb3a0: /* CELGBR */ goto unimplemented;
+   case 0xb3a1: /* CDLGBR */ goto unimplemented;
+   case 0xb3a2: /* CXLGBR */ goto unimplemented;
+   case 0xb3a4: s390_format_RRE_FR(s390_irgen_CEGBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb3a5: s390_format_RRE_FR(s390_irgen_CDGBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb3a6: s390_format_RRE_FR(s390_irgen_CXGBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb3a8: s390_format_RRF_U0RF(s390_irgen_CGEBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb3a9: s390_format_RRF_U0RF(s390_irgen_CGDBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb3aa: s390_format_RRF_U0RF(s390_irgen_CGXBR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2);
+                                     goto ok;
+   case 0xb3b4: /* CEFR */ goto unimplemented;
+   case 0xb3b5: /* CDFR */ goto unimplemented;
+   case 0xb3b6: /* CXFR */ goto unimplemented;
+   case 0xb3b8: /* CFER */ goto unimplemented;
+   case 0xb3b9: /* CFDR */ goto unimplemented;
+   case 0xb3ba: /* CFXR */ goto unimplemented;
+   case 0xb3c1: s390_format_RRE_FR(s390_irgen_LDGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb3c4: /* CEGR */ goto unimplemented;
+   case 0xb3c5: /* CDGR */ goto unimplemented;
+   case 0xb3c6: /* CXGR */ goto unimplemented;
+   case 0xb3c8: /* CGER */ goto unimplemented;
+   case 0xb3c9: /* CGDR */ goto unimplemented;
+   case 0xb3ca: /* CGXR */ goto unimplemented;
+   case 0xb3cd: s390_format_RRE_RF(s390_irgen_LGDR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb3d0: /* MDTR */ goto unimplemented;
+   case 0xb3d1: /* DDTR */ goto unimplemented;
+   case 0xb3d2: /* ADTR */ goto unimplemented;
+   case 0xb3d3: /* SDTR */ goto unimplemented;
+   case 0xb3d4: /* LDETR */ goto unimplemented;
+   case 0xb3d5: /* LEDTR */ goto unimplemented;
+   case 0xb3d6: /* LTDTR */ goto unimplemented;
+   case 0xb3d7: /* FIDTR */ goto unimplemented;
+   case 0xb3d8: /* MXTR */ goto unimplemented;
+   case 0xb3d9: /* DXTR */ goto unimplemented;
+   case 0xb3da: /* AXTR */ goto unimplemented;
+   case 0xb3db: /* SXTR */ goto unimplemented;
+   case 0xb3dc: /* LXDTR */ goto unimplemented;
+   case 0xb3dd: /* LDXTR */ goto unimplemented;
+   case 0xb3de: /* LTXTR */ goto unimplemented;
+   case 0xb3df: /* FIXTR */ goto unimplemented;
+   case 0xb3e0: /* KDTR */ goto unimplemented;
+   case 0xb3e1: /* CGDTR */ goto unimplemented;
+   case 0xb3e2: /* CUDTR */ goto unimplemented;
+   case 0xb3e3: /* CSDTR */ goto unimplemented;
+   case 0xb3e4: /* CDTR */ goto unimplemented;
+   case 0xb3e5: /* EEDTR */ goto unimplemented;
+   case 0xb3e7: /* ESDTR */ goto unimplemented;
+   case 0xb3e8: /* KXTR */ goto unimplemented;
+   case 0xb3e9: /* CGXTR */ goto unimplemented;
+   case 0xb3ea: /* CUXTR */ goto unimplemented;
+   case 0xb3eb: /* CSXTR */ goto unimplemented;
+   case 0xb3ec: /* CXTR */ goto unimplemented;
+   case 0xb3ed: /* EEXTR */ goto unimplemented;
+   case 0xb3ef: /* ESXTR */ goto unimplemented;
+   case 0xb3f1: /* CDGTR */ goto unimplemented;
+   case 0xb3f2: /* CDUTR */ goto unimplemented;
+   case 0xb3f3: /* CDSTR */ goto unimplemented;
+   case 0xb3f4: /* CEDTR */ goto unimplemented;
+   case 0xb3f5: /* QADTR */ goto unimplemented;
+   case 0xb3f6: /* IEDTR */ goto unimplemented;
+   case 0xb3f7: /* RRDTR */ goto unimplemented;
+   case 0xb3f9: /* CXGTR */ goto unimplemented;
+   case 0xb3fa: /* CXUTR */ goto unimplemented;
+   case 0xb3fb: /* CXSTR */ goto unimplemented;
+   case 0xb3fc: /* CEXTR */ goto unimplemented;
+   case 0xb3fd: /* QAXTR */ goto unimplemented;
+   case 0xb3fe: /* IEXTR */ goto unimplemented;
+   case 0xb3ff: /* RRXTR */ goto unimplemented;
+   case 0xb900: s390_format_RRE_RR(s390_irgen_LPGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb901: s390_format_RRE_RR(s390_irgen_LNGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb902: s390_format_RRE_RR(s390_irgen_LTGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb903: s390_format_RRE_RR(s390_irgen_LCGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb904: s390_format_RRE_RR(s390_irgen_LGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb905: /* LURAG */ goto unimplemented;
+   case 0xb906: s390_format_RRE_RR(s390_irgen_LGBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb907: s390_format_RRE_RR(s390_irgen_LGHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb908: s390_format_RRE_RR(s390_irgen_AGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb909: s390_format_RRE_RR(s390_irgen_SGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb90a: s390_format_RRE_RR(s390_irgen_ALGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb90b: s390_format_RRE_RR(s390_irgen_SLGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb90c: s390_format_RRE_RR(s390_irgen_MSGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb90d: s390_format_RRE_RR(s390_irgen_DSGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb90e: /* EREGG */ goto unimplemented;
+   case 0xb90f: s390_format_RRE_RR(s390_irgen_LRVGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb910: s390_format_RRE_RR(s390_irgen_LPGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb911: s390_format_RRE_RR(s390_irgen_LNGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb912: s390_format_RRE_RR(s390_irgen_LTGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb913: s390_format_RRE_RR(s390_irgen_LCGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb914: s390_format_RRE_RR(s390_irgen_LGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb916: s390_format_RRE_RR(s390_irgen_LLGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb917: s390_format_RRE_RR(s390_irgen_LLGTR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb918: s390_format_RRE_RR(s390_irgen_AGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb919: s390_format_RRE_RR(s390_irgen_SGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb91a: s390_format_RRE_RR(s390_irgen_ALGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb91b: s390_format_RRE_RR(s390_irgen_SLGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb91c: s390_format_RRE_RR(s390_irgen_MSGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb91d: s390_format_RRE_RR(s390_irgen_DSGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb91e: /* KMAC */ goto unimplemented;
+   case 0xb91f: s390_format_RRE_RR(s390_irgen_LRVR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb920: s390_format_RRE_RR(s390_irgen_CGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb921: s390_format_RRE_RR(s390_irgen_CLGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb925: /* STURG */ goto unimplemented;
+   case 0xb926: s390_format_RRE_RR(s390_irgen_LBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb927: s390_format_RRE_RR(s390_irgen_LHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb928: /* PCKMO */ goto unimplemented;
+   case 0xb92b: /* KMO */ goto unimplemented;
+   case 0xb92c: /* PCC */ goto unimplemented;
+   case 0xb92d: /* KMCTR */ goto unimplemented;
+   case 0xb92e: /* KM */ goto unimplemented;
+   case 0xb92f: /* KMC */ goto unimplemented;
+   case 0xb930: s390_format_RRE_RR(s390_irgen_CGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb931: s390_format_RRE_RR(s390_irgen_CLGFR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb93e: /* KIMD */ goto unimplemented;
+   case 0xb93f: /* KLMD */ goto unimplemented;
+   case 0xb941: /* CFDTR */ goto unimplemented;
+   case 0xb942: /* CLGDTR */ goto unimplemented;
+   case 0xb943: /* CLFDTR */ goto unimplemented;
+   case 0xb946: s390_format_RRE_RR(s390_irgen_BCTGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb949: /* CFXTR */ goto unimplemented;
+   case 0xb94a: /* CLGXTR */ goto unimplemented;
+   case 0xb94b: /* CLFXTR */ goto unimplemented;
+   case 0xb951: /* CDFTR */ goto unimplemented;
+   case 0xb952: /* CDLGTR */ goto unimplemented;
+   case 0xb953: /* CDLFTR */ goto unimplemented;
+   case 0xb959: /* CXFTR */ goto unimplemented;
+   case 0xb95a: /* CXLGTR */ goto unimplemented;
+   case 0xb95b: /* CXLFTR */ goto unimplemented;
+   case 0xb960: /* CGRT */ goto unimplemented;
+   case 0xb961: /* CLGRT */ goto unimplemented;
+   case 0xb972: /* CRT */ goto unimplemented;
+   case 0xb973: /* CLRT */ goto unimplemented;
+   case 0xb980: s390_format_RRE_RR(s390_irgen_NGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb981: s390_format_RRE_RR(s390_irgen_OGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb982: s390_format_RRE_RR(s390_irgen_XGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb983: s390_format_RRE_RR(s390_irgen_FLOGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb984: s390_format_RRE_RR(s390_irgen_LLGCR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb985: s390_format_RRE_RR(s390_irgen_LLGHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb986: s390_format_RRE_RR(s390_irgen_MLGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb987: s390_format_RRE_RR(s390_irgen_DLGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb988: s390_format_RRE_RR(s390_irgen_ALCGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb989: s390_format_RRE_RR(s390_irgen_SLBGR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb98a: /* CSPG */ goto unimplemented;
+   case 0xb98d: /* EPSW */ goto unimplemented;
+   case 0xb98e: /* IDTE */ goto unimplemented;
+   case 0xb990: /* TRTT */ goto unimplemented;
+   case 0xb991: /* TRTO */ goto unimplemented;
+   case 0xb992: /* TROT */ goto unimplemented;
+   case 0xb993: /* TROO */ goto unimplemented;
+   case 0xb994: s390_format_RRE_RR(s390_irgen_LLCR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb995: s390_format_RRE_RR(s390_irgen_LLHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb996: s390_format_RRE_RR(s390_irgen_MLR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb997: s390_format_RRE_RR(s390_irgen_DLR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb998: s390_format_RRE_RR(s390_irgen_ALCR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb999: s390_format_RRE_RR(s390_irgen_SLBR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb99a: /* EPAIR */ goto unimplemented;
+   case 0xb99b: /* ESAIR */ goto unimplemented;
+   case 0xb99d: /* ESEA */ goto unimplemented;
+   case 0xb99e: /* PTI */ goto unimplemented;
+   case 0xb99f: /* SSAIR */ goto unimplemented;
+   case 0xb9a2: /* PTF */ goto unimplemented;
+   case 0xb9aa: /* LPTEA */ goto unimplemented;
+   case 0xb9ae: /* RRBM */ goto unimplemented;
+   case 0xb9af: /* PFMF */ goto unimplemented;
+   case 0xb9b0: /* CU14 */ goto unimplemented;
+   case 0xb9b1: /* CU24 */ goto unimplemented;
+   case 0xb9b2: /* CU41 */ goto unimplemented;
+   case 0xb9b3: /* CU42 */ goto unimplemented;
+   case 0xb9bd: /* TRTRE */ goto unimplemented;
+   case 0xb9be: /* SRSTU */ goto unimplemented;
+   case 0xb9bf: /* TRTE */ goto unimplemented;
+   case 0xb9c8: s390_format_RRF_R0RR2(s390_irgen_AHHHR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9c9: s390_format_RRF_R0RR2(s390_irgen_SHHHR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9ca: s390_format_RRF_R0RR2(s390_irgen_ALHHHR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9cb: s390_format_RRF_R0RR2(s390_irgen_SLHHHR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9cd: s390_format_RRE_RR(s390_irgen_CHHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb9cf: s390_format_RRE_RR(s390_irgen_CLHHR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb9d8: s390_format_RRF_R0RR2(s390_irgen_AHHLR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9d9: s390_format_RRF_R0RR2(s390_irgen_SHHLR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9da: s390_format_RRF_R0RR2(s390_irgen_ALHHLR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9db: s390_format_RRF_R0RR2(s390_irgen_SLHHLR, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9dd: s390_format_RRE_RR(s390_irgen_CHLR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb9df: s390_format_RRE_RR(s390_irgen_CLHLR, ovl.fmt.RRE.r1,
+                                   ovl.fmt.RRE.r2);  goto ok;
+   case 0xb9e1: /* POPCNT */ goto unimplemented;
+   case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2,
+                                     S390_XMNM_LOCGR);  goto ok;
+   case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9e6: s390_format_RRF_R0RR2(s390_irgen_OGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9e7: s390_format_RRF_R0RR2(s390_irgen_XGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9e8: s390_format_RRF_R0RR2(s390_irgen_AGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9e9: s390_format_RRF_R0RR2(s390_irgen_SGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9ea: s390_format_RRF_R0RR2(s390_irgen_ALGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9eb: s390_format_RRF_R0RR2(s390_irgen_SLGRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9f2: s390_format_RRF_U0RR(s390_irgen_LOCR, ovl.fmt.RRF3.r3,
+                                     ovl.fmt.RRF3.r1, ovl.fmt.RRF3.r2,
+                                     S390_XMNM_LOCR);  goto ok;
+   case 0xb9f4: s390_format_RRF_R0RR2(s390_irgen_NRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9f6: s390_format_RRF_R0RR2(s390_irgen_ORK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9f7: s390_format_RRF_R0RR2(s390_irgen_XRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9f8: s390_format_RRF_R0RR2(s390_irgen_ARK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9f9: s390_format_RRF_R0RR2(s390_irgen_SRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9fa: s390_format_RRF_R0RR2(s390_irgen_ALRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   case 0xb9fb: s390_format_RRF_R0RR2(s390_irgen_SLRK, ovl.fmt.RRF4.r3,
+                                      ovl.fmt.RRF4.r1, ovl.fmt.RRF4.r2);
+                                      goto ok;
+   }
+
+   switch ((ovl.value & 0xff000000) >> 24) {
+   case 0x40: s390_format_RX_RRRD(s390_irgen_STH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x41: s390_format_RX_RRRD(s390_irgen_LA, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x42: s390_format_RX_RRRD(s390_irgen_STC, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x43: s390_format_RX_RRRD(s390_irgen_IC, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x44: s390_format_RX_RRRD(s390_irgen_EX, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x45: /* BAL */ goto unimplemented;
+   case 0x46: s390_format_RX_RRRD(s390_irgen_BCT, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x47: s390_format_RX(s390_irgen_BC, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                             ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x48: s390_format_RX_RRRD(s390_irgen_LH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x49: s390_format_RX_RRRD(s390_irgen_CH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4a: s390_format_RX_RRRD(s390_irgen_AH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4b: s390_format_RX_RRRD(s390_irgen_SH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4c: s390_format_RX_RRRD(s390_irgen_MH, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4d: s390_format_RX_RRRD(s390_irgen_BAS, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4e: s390_format_RX_RRRD(s390_irgen_CVD, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x4f: s390_format_RX_RRRD(s390_irgen_CVB, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x50: s390_format_RX_RRRD(s390_irgen_ST, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x51: s390_format_RX_RRRD(s390_irgen_LAE, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x54: s390_format_RX_RRRD(s390_irgen_N, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x55: s390_format_RX_RRRD(s390_irgen_CL, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x56: s390_format_RX_RRRD(s390_irgen_O, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x57: s390_format_RX_RRRD(s390_irgen_X, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x58: s390_format_RX_RRRD(s390_irgen_L, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x59: s390_format_RX_RRRD(s390_irgen_C, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5a: s390_format_RX_RRRD(s390_irgen_A, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5b: s390_format_RX_RRRD(s390_irgen_S, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5c: s390_format_RX_RRRD(s390_irgen_M, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5d: s390_format_RX_RRRD(s390_irgen_D, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5e: s390_format_RX_RRRD(s390_irgen_AL, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x5f: s390_format_RX_RRRD(s390_irgen_SL, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x60: s390_format_RX_FRRD(s390_irgen_STD, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x67: /* MXD */ goto unimplemented;
+   case 0x68: s390_format_RX_FRRD(s390_irgen_LD, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x69: /* CD */ goto unimplemented;
+   case 0x6a: /* AD */ goto unimplemented;
+   case 0x6b: /* SD */ goto unimplemented;
+   case 0x6c: /* MD */ goto unimplemented;
+   case 0x6d: /* DD */ goto unimplemented;
+   case 0x6e: /* AW */ goto unimplemented;
+   case 0x6f: /* SW */ goto unimplemented;
+   case 0x70: s390_format_RX_FRRD(s390_irgen_STE, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x71: s390_format_RX_RRRD(s390_irgen_MS, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x78: s390_format_RX_FRRD(s390_irgen_LE, ovl.fmt.RX.r1, ovl.fmt.RX.x2,
+                                  ovl.fmt.RX.b2, ovl.fmt.RX.d2);  goto ok;
+   case 0x79: /* CE */ goto unimplemented;
+   case 0x7a: /* AE */ goto unimplemented;
+   case 0x7b: /* SE */ goto unimplemented;
+   case 0x7c: /* MDE */ goto unimplemented;
+   case 0x7d: /* DE */ goto unimplemented;
+   case 0x7e: /* AU */ goto unimplemented;
+   case 0x7f: /* SU */ goto unimplemented;
+   case 0x83: /* DIAG */ goto unimplemented;
+   case 0x84: s390_format_RSI_RRP(s390_irgen_BRXH, ovl.fmt.RSI.r1,
+                                  ovl.fmt.RSI.r3, ovl.fmt.RSI.i2);  goto ok;
+   case 0x85: s390_format_RSI_RRP(s390_irgen_BRXLE, ovl.fmt.RSI.r1,
+                                  ovl.fmt.RSI.r3, ovl.fmt.RSI.i2);  goto ok;
+   case 0x86: s390_format_RS_RRRD(s390_irgen_BXH, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0x87: s390_format_RS_RRRD(s390_irgen_BXLE, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0x88: s390_format_RS_R0RD(s390_irgen_SRL, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x89: s390_format_RS_R0RD(s390_irgen_SLL, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8a: s390_format_RS_R0RD(s390_irgen_SRA, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8b: s390_format_RS_R0RD(s390_irgen_SLA, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8c: s390_format_RS_R0RD(s390_irgen_SRDL, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8d: s390_format_RS_R0RD(s390_irgen_SLDL, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8e: s390_format_RS_R0RD(s390_irgen_SRDA, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x8f: s390_format_RS_R0RD(s390_irgen_SLDA, ovl.fmt.RS.r1, ovl.fmt.RS.b2,
+                                  ovl.fmt.RS.d2);  goto ok;
+   case 0x90: s390_format_RS_RRRD(s390_irgen_STM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0x91: s390_format_SI_URD(s390_irgen_TM, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x92: s390_format_SI_URD(s390_irgen_MVI, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x94: s390_format_SI_URD(s390_irgen_NI, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x95: s390_format_SI_URD(s390_irgen_CLI, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x96: s390_format_SI_URD(s390_irgen_OI, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x97: s390_format_SI_URD(s390_irgen_XI, ovl.fmt.SI.i2, ovl.fmt.SI.b1,
+                                 ovl.fmt.SI.d1);  goto ok;
+   case 0x98: s390_format_RS_RRRD(s390_irgen_LM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0x99: /* TRACE */ goto unimplemented;
+   case 0x9a: s390_format_RS_AARD(s390_irgen_LAM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0x9b: s390_format_RS_AARD(s390_irgen_STAM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0xa8: s390_format_RS_RRRD(s390_irgen_MVCLE, ovl.fmt.RS.r1,
+                                  ovl.fmt.RS.r3, ovl.fmt.RS.b2, ovl.fmt.RS.d2);
+                                  goto ok;
+   case 0xa9: s390_format_RS_RRRD(s390_irgen_CLCLE, ovl.fmt.RS.r1,
+                                  ovl.fmt.RS.r3, ovl.fmt.RS.b2, ovl.fmt.RS.d2);
+                                  goto ok;
+   case 0xac: /* STNSM */ goto unimplemented;
+   case 0xad: /* STOSM */ goto unimplemented;
+   case 0xae: /* SIGP */ goto unimplemented;
+   case 0xaf: /* MC */ goto unimplemented;
+   case 0xb1: /* LRA */ goto unimplemented;
+   case 0xb6: /* STCTL */ goto unimplemented;
+   case 0xb7: /* LCTL */ goto unimplemented;
+   case 0xba: s390_format_RS_RRRD(s390_irgen_CS, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0xbb: /* CDS */ goto unimplemented;
+   case 0xbd: s390_format_RS_RURD(s390_irgen_CLM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0xbe: s390_format_RS_RURD(s390_irgen_STCM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   case 0xbf: s390_format_RS_RURD(s390_irgen_ICM, ovl.fmt.RS.r1, ovl.fmt.RS.r3,
+                                  ovl.fmt.RS.b2, ovl.fmt.RS.d2);  goto ok;
+   }
+
+   return S390_DECODE_UNKNOWN_INSN;
+
+ok:
+   return S390_DECODE_OK;
+
+unimplemented:
+   return S390_DECODE_UNIMPLEMENTED_INSN;
+}
+
+static s390_decode_t
+s390_decode_6byte_and_irgen(UChar *bytes)
+{
+   typedef union {
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int r3  :  4;
+         unsigned int i2  : 16;
+         unsigned int     :  8;
+         unsigned int op2 :  8;
+      } RIE;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int r2  :  4;
+         unsigned int i3  :  8;
+         unsigned int i4  :  8;
+         unsigned int i5  :  8;
+         unsigned int op2 :  8;
+      } RIE_RRUUU;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int     :  4;
+         unsigned int i2  : 16;
+         unsigned int m3  :  4;
+         unsigned int     :  4;
+         unsigned int op2 :  8;
+      } RIEv1;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int r2  :  4;
+         unsigned int i4  : 16;
+         unsigned int m3  :  4;
+         unsigned int     :  4;
+         unsigned int op2 :  8;
+      } RIE_RRPU;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int m3  :  4;
+         unsigned int i4  : 16;
+         unsigned int i2  :  8;
+         unsigned int op2 :  8;
+      } RIEv3;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int op2 :  4;
+         unsigned int i2  : 32;
+      } RIL;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int m3  :  4;
+         unsigned int b4  :  4;
+         unsigned int d4  : 12;
+         unsigned int i2  :  8;
+         unsigned int op2 :  8;
+      } RIS;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int r2  :  4;
+         unsigned int b4  :  4;
+         unsigned int d4  : 12;
+         unsigned int m3  :  4;
+         unsigned int     :  4;
+         unsigned int op2 :  8;
+      } RRS;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int l1  :  4;
+         unsigned int     :  4;
+         unsigned int b1  :  4;
+         unsigned int d1  : 12;
+         unsigned int     :  8;
+         unsigned int op2 :  8;
+      } RSL;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int r3  :  4;
+         unsigned int b2  :  4;
+         unsigned int dl2 : 12;
+         unsigned int dh2 :  8;
+         unsigned int op2 :  8;
+      } RSY;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int x2  :  4;
+         unsigned int b2  :  4;
+         unsigned int d2  : 12;
+         unsigned int     :  8;
+         unsigned int op2 :  8;
+      } RXE;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r3  :  4;
+         unsigned int x2  :  4;
+         unsigned int b2  :  4;
+         unsigned int d2  : 12;
+         unsigned int r1  :  4;
+         unsigned int     :  4;
+         unsigned int op2 :  8;
+      } RXF;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r1  :  4;
+         unsigned int x2  :  4;
+         unsigned int b2  :  4;
+         unsigned int dl2 : 12;
+         unsigned int dh2 :  8;
+         unsigned int op2 :  8;
+      } RXY;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int i2  :  8;
+         unsigned int b1  :  4;
+         unsigned int dl1 : 12;
+         unsigned int dh1 :  8;
+         unsigned int op2 :  8;
+      } SIY;
+      struct {
+         unsigned int op :  8;
+         unsigned int l  :  8;
+         unsigned int b1 :  4;
+         unsigned int d1 : 12;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } SS;
+      struct {
+         unsigned int op :  8;
+         unsigned int l1 :  4;
+         unsigned int l2 :  4;
+         unsigned int b1 :  4;
+         unsigned int d1 : 12;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } SS_LLRDRD;
+      struct {
+         unsigned int op :  8;
+         unsigned int r1 :  4;
+         unsigned int r3 :  4;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+         unsigned int b4 :  4;
+         unsigned int d4 : 12;
+      } SS_RRRDRD2;
+      struct {
+         unsigned int op : 16;
+         unsigned int b1 :  4;
+         unsigned int d1 : 12;
+         unsigned int b2 :  4;
+         unsigned int d2 : 12;
+      } SSE;
+      struct {
+         unsigned int op1 :  8;
+         unsigned int r3  :  4;
+         unsigned int op2 :  4;
+         unsigned int b1  :  4;
+         unsigned int d1  : 12;
+         unsigned int b2  :  4;
+         unsigned int d2  : 12;
+      } SSF;
+      struct {
+         unsigned int op : 16;
+         unsigned int b1 :  4;
+         unsigned int d1 : 12;
+         unsigned int i2 : 16;
+      } SIL;
+   } formats;
+   union {
+      formats fmt;
+      ULong value;
+   } ovl;
+
+   vassert(sizeof(formats) == 6);
+
+   ((char *)(&ovl.value))[0] = bytes[0];
+   ((char *)(&ovl.value))[1] = bytes[1];
+   ((char *)(&ovl.value))[2] = bytes[2];
+   ((char *)(&ovl.value))[3] = bytes[3];
+   ((char *)(&ovl.value))[4] = bytes[4];
+   ((char *)(&ovl.value))[5] = bytes[5];
+   ((char *)(&ovl.value))[6] = 0x0;
+   ((char *)(&ovl.value))[7] = 0x0;
+
+   switch ((ovl.value >> 16) & 0xff00000000ffULL) {
+   case 0xe30000000002ULL: s390_format_RXY_RRRD(s390_irgen_LTG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000003ULL: /* LRAG */ goto unimplemented;
+   case 0xe30000000004ULL: s390_format_RXY_RRRD(s390_irgen_LG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000006ULL: s390_format_RXY_RRRD(s390_irgen_CVBY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000008ULL: s390_format_RXY_RRRD(s390_irgen_AG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000009ULL: s390_format_RXY_RRRD(s390_irgen_SG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000000aULL: s390_format_RXY_RRRD(s390_irgen_ALG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000000bULL: s390_format_RXY_RRRD(s390_irgen_SLG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000000cULL: s390_format_RXY_RRRD(s390_irgen_MSG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000000dULL: s390_format_RXY_RRRD(s390_irgen_DSG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000000eULL: /* CVBG */ goto unimplemented;
+   case 0xe3000000000fULL: s390_format_RXY_RRRD(s390_irgen_LRVG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000012ULL: s390_format_RXY_RRRD(s390_irgen_LT, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000013ULL: /* LRAY */ goto unimplemented;
+   case 0xe30000000014ULL: s390_format_RXY_RRRD(s390_irgen_LGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000015ULL: s390_format_RXY_RRRD(s390_irgen_LGH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000016ULL: s390_format_RXY_RRRD(s390_irgen_LLGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000017ULL: s390_format_RXY_RRRD(s390_irgen_LLGT, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000018ULL: s390_format_RXY_RRRD(s390_irgen_AGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000019ULL: s390_format_RXY_RRRD(s390_irgen_SGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001aULL: s390_format_RXY_RRRD(s390_irgen_ALGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001bULL: s390_format_RXY_RRRD(s390_irgen_SLGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001cULL: s390_format_RXY_RRRD(s390_irgen_MSGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001dULL: s390_format_RXY_RRRD(s390_irgen_DSGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001eULL: s390_format_RXY_RRRD(s390_irgen_LRV, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000001fULL: s390_format_RXY_RRRD(s390_irgen_LRVH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000020ULL: s390_format_RXY_RRRD(s390_irgen_CG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000021ULL: s390_format_RXY_RRRD(s390_irgen_CLG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000024ULL: s390_format_RXY_RRRD(s390_irgen_STG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000026ULL: s390_format_RXY_RRRD(s390_irgen_CVDY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000002eULL: /* CVDG */ goto unimplemented;
+   case 0xe3000000002fULL: s390_format_RXY_RRRD(s390_irgen_STRVG,
+                                                ovl.fmt.RXY.r1, ovl.fmt.RXY.x2,
+                                                ovl.fmt.RXY.b2, ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000030ULL: s390_format_RXY_RRRD(s390_irgen_CGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000031ULL: s390_format_RXY_RRRD(s390_irgen_CLGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000032ULL: s390_format_RXY_RRRD(s390_irgen_LTGF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000034ULL: s390_format_RXY_RRRD(s390_irgen_CGH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000036ULL: s390_format_RXY_URRD(s390_irgen_PFD, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000003eULL: s390_format_RXY_RRRD(s390_irgen_STRV, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000003fULL: s390_format_RXY_RRRD(s390_irgen_STRVH,
+                                                ovl.fmt.RXY.r1, ovl.fmt.RXY.x2,
+                                                ovl.fmt.RXY.b2, ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000046ULL: s390_format_RXY_RRRD(s390_irgen_BCTG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000050ULL: s390_format_RXY_RRRD(s390_irgen_STY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000051ULL: s390_format_RXY_RRRD(s390_irgen_MSY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000054ULL: s390_format_RXY_RRRD(s390_irgen_NY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000055ULL: s390_format_RXY_RRRD(s390_irgen_CLY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000056ULL: s390_format_RXY_RRRD(s390_irgen_OY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000057ULL: s390_format_RXY_RRRD(s390_irgen_XY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000058ULL: s390_format_RXY_RRRD(s390_irgen_LY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000059ULL: s390_format_RXY_RRRD(s390_irgen_CY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000005aULL: s390_format_RXY_RRRD(s390_irgen_AY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000005bULL: s390_format_RXY_RRRD(s390_irgen_SY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000005cULL: s390_format_RXY_RRRD(s390_irgen_MFY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000005eULL: s390_format_RXY_RRRD(s390_irgen_ALY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000005fULL: s390_format_RXY_RRRD(s390_irgen_SLY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000070ULL: s390_format_RXY_RRRD(s390_irgen_STHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000071ULL: s390_format_RXY_RRRD(s390_irgen_LAY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000072ULL: s390_format_RXY_RRRD(s390_irgen_STCY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000073ULL: s390_format_RXY_RRRD(s390_irgen_ICY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000075ULL: s390_format_RXY_RRRD(s390_irgen_LAEY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000076ULL: s390_format_RXY_RRRD(s390_irgen_LB, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000077ULL: s390_format_RXY_RRRD(s390_irgen_LGB, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000078ULL: s390_format_RXY_RRRD(s390_irgen_LHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000079ULL: s390_format_RXY_RRRD(s390_irgen_CHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000007aULL: s390_format_RXY_RRRD(s390_irgen_AHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000007bULL: s390_format_RXY_RRRD(s390_irgen_SHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000007cULL: s390_format_RXY_RRRD(s390_irgen_MHY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000080ULL: s390_format_RXY_RRRD(s390_irgen_NG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000081ULL: s390_format_RXY_RRRD(s390_irgen_OG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000082ULL: s390_format_RXY_RRRD(s390_irgen_XG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000086ULL: s390_format_RXY_RRRD(s390_irgen_MLG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000087ULL: s390_format_RXY_RRRD(s390_irgen_DLG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000088ULL: s390_format_RXY_RRRD(s390_irgen_ALCG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000089ULL: s390_format_RXY_RRRD(s390_irgen_SLBG, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000008eULL: s390_format_RXY_RRRD(s390_irgen_STPQ, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe3000000008fULL: s390_format_RXY_RRRD(s390_irgen_LPQ, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000090ULL: s390_format_RXY_RRRD(s390_irgen_LLGC, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000091ULL: s390_format_RXY_RRRD(s390_irgen_LLGH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000094ULL: s390_format_RXY_RRRD(s390_irgen_LLC, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000095ULL: s390_format_RXY_RRRD(s390_irgen_LLH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000096ULL: s390_format_RXY_RRRD(s390_irgen_ML, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000097ULL: s390_format_RXY_RRRD(s390_irgen_DL, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000098ULL: s390_format_RXY_RRRD(s390_irgen_ALC, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe30000000099ULL: s390_format_RXY_RRRD(s390_irgen_SLB, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c0ULL: s390_format_RXY_RRRD(s390_irgen_LBH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c2ULL: s390_format_RXY_RRRD(s390_irgen_LLCH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c3ULL: s390_format_RXY_RRRD(s390_irgen_STCH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c4ULL: s390_format_RXY_RRRD(s390_irgen_LHH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c6ULL: s390_format_RXY_RRRD(s390_irgen_LLHH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000c7ULL: s390_format_RXY_RRRD(s390_irgen_STHH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000caULL: s390_format_RXY_RRRD(s390_irgen_LFH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000cbULL: s390_format_RXY_RRRD(s390_irgen_STFH, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000cdULL: s390_format_RXY_RRRD(s390_irgen_CHF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xe300000000cfULL: s390_format_RXY_RRRD(s390_irgen_CLHF, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xeb0000000004ULL: s390_format_RSY_RRRD(s390_irgen_LMG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000000aULL: s390_format_RSY_RRRD(s390_irgen_SRAG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000000bULL: s390_format_RSY_RRRD(s390_irgen_SLAG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000000cULL: s390_format_RSY_RRRD(s390_irgen_SRLG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000000dULL: s390_format_RSY_RRRD(s390_irgen_SLLG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000000fULL: /* TRACG */ goto unimplemented;
+   case 0xeb0000000014ULL: s390_format_RSY_RRRD(s390_irgen_CSY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000001cULL: s390_format_RSY_RRRD(s390_irgen_RLLG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000001dULL: s390_format_RSY_RRRD(s390_irgen_RLL, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000020ULL: s390_format_RSY_RURD(s390_irgen_CLMH, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000021ULL: s390_format_RSY_RURD(s390_irgen_CLMY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000024ULL: s390_format_RSY_RRRD(s390_irgen_STMG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000025ULL: /* STCTG */ goto unimplemented;
+   case 0xeb0000000026ULL: s390_format_RSY_RRRD(s390_irgen_STMH, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000002cULL: s390_format_RSY_RURD(s390_irgen_STCMH,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000002dULL: s390_format_RSY_RURD(s390_irgen_STCMY,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000002fULL: /* LCTLG */ goto unimplemented;
+   case 0xeb0000000030ULL: s390_format_RSY_RRRD(s390_irgen_CSG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000031ULL: /* CDSY */ goto unimplemented;
+   case 0xeb000000003eULL: /* CDSG */ goto unimplemented;
+   case 0xeb0000000044ULL: s390_format_RSY_RRRD(s390_irgen_BXHG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000045ULL: s390_format_RSY_RRRD(s390_irgen_BXLEG,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000004cULL: /* ECAG */ goto unimplemented;
+   case 0xeb0000000051ULL: s390_format_SIY_URD(s390_irgen_TMY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000052ULL: s390_format_SIY_URD(s390_irgen_MVIY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000054ULL: s390_format_SIY_URD(s390_irgen_NIY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000055ULL: s390_format_SIY_URD(s390_irgen_CLIY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000056ULL: s390_format_SIY_URD(s390_irgen_OIY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000057ULL: s390_format_SIY_URD(s390_irgen_XIY, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb000000006aULL: s390_format_SIY_IRD(s390_irgen_ASI, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb000000006eULL: s390_format_SIY_IRD(s390_irgen_ALSI, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb000000007aULL: s390_format_SIY_IRD(s390_irgen_AGSI, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb000000007eULL: s390_format_SIY_IRD(s390_irgen_ALGSI, ovl.fmt.SIY.i2,
+                                               ovl.fmt.SIY.b1, ovl.fmt.SIY.dl1,
+                                               ovl.fmt.SIY.dh1);  goto ok;
+   case 0xeb0000000080ULL: s390_format_RSY_RURD(s390_irgen_ICMH, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000081ULL: s390_format_RSY_RURD(s390_irgen_ICMY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000008eULL: /* MVCLU */ goto unimplemented;
+   case 0xeb000000008fULL: /* CLCLU */ goto unimplemented;
+   case 0xeb0000000090ULL: s390_format_RSY_RRRD(s390_irgen_STMY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000096ULL: s390_format_RSY_RRRD(s390_irgen_LMH, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb0000000098ULL: s390_format_RSY_RRRD(s390_irgen_LMY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000009aULL: s390_format_RSY_AARD(s390_irgen_LAMY, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb000000009bULL: s390_format_RSY_AARD(s390_irgen_STAMY,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000c0ULL: /* TP */ goto unimplemented;
+   case 0xeb00000000dcULL: s390_format_RSY_RRRD(s390_irgen_SRAK, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000ddULL: s390_format_RSY_RRRD(s390_irgen_SLAK, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000deULL: s390_format_RSY_RRRD(s390_irgen_SRLK, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000dfULL: s390_format_RSY_RRRD(s390_irgen_SLLK, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000e2ULL: s390_format_RSY_RDRM(s390_irgen_LOCG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2,
+                                                S390_XMNM_LOCG);  goto ok;
+   case 0xeb00000000e3ULL: s390_format_RSY_RDRM(s390_irgen_STOCG,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2,
+                                                S390_XMNM_STOCG);  goto ok;
+   case 0xeb00000000e4ULL: s390_format_RSY_RRRD(s390_irgen_LANG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000e6ULL: s390_format_RSY_RRRD(s390_irgen_LAOG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000e7ULL: s390_format_RSY_RRRD(s390_irgen_LAXG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000e8ULL: s390_format_RSY_RRRD(s390_irgen_LAAG, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000eaULL: s390_format_RSY_RRRD(s390_irgen_LAALG,
+                                                ovl.fmt.RSY.r1, ovl.fmt.RSY.r3,
+                                                ovl.fmt.RSY.b2, ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000f2ULL: s390_format_RSY_RDRM(s390_irgen_LOC, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2, S390_XMNM_LOC);
+                                                goto ok;
+   case 0xeb00000000f3ULL: s390_format_RSY_RDRM(s390_irgen_STOC, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2,
+                                                S390_XMNM_STOC);  goto ok;
+   case 0xeb00000000f4ULL: s390_format_RSY_RRRD(s390_irgen_LAN, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000f6ULL: s390_format_RSY_RRRD(s390_irgen_LAO, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000f7ULL: s390_format_RSY_RRRD(s390_irgen_LAX, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000f8ULL: s390_format_RSY_RRRD(s390_irgen_LAA, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xeb00000000faULL: s390_format_RSY_RRRD(s390_irgen_LAAL, ovl.fmt.RSY.r1,
+                                                ovl.fmt.RSY.r3, ovl.fmt.RSY.b2,
+                                                ovl.fmt.RSY.dl2,
+                                                ovl.fmt.RSY.dh2);  goto ok;
+   case 0xec0000000044ULL: s390_format_RIE_RRP(s390_irgen_BRXHG, ovl.fmt.RIE.r1,
+                                               ovl.fmt.RIE.r3, ovl.fmt.RIE.i2);
+                                               goto ok;
+   case 0xec0000000045ULL: s390_format_RIE_RRP(s390_irgen_BRXLG, ovl.fmt.RIE.r1,
+                                               ovl.fmt.RIE.r3, ovl.fmt.RIE.i2);
+                                               goto ok;
+   case 0xec0000000051ULL: /* RISBLG */ goto unimplemented;
+   case 0xec0000000054ULL: s390_format_RIE_RRUUU(s390_irgen_RNSBG,
+                                                 ovl.fmt.RIE_RRUUU.r1,
+                                                 ovl.fmt.RIE_RRUUU.r2,
+                                                 ovl.fmt.RIE_RRUUU.i3,
+                                                 ovl.fmt.RIE_RRUUU.i4,
+                                                 ovl.fmt.RIE_RRUUU.i5);
+                                                 goto ok;
+   case 0xec0000000055ULL: s390_format_RIE_RRUUU(s390_irgen_RISBG,
+                                                 ovl.fmt.RIE_RRUUU.r1,
+                                                 ovl.fmt.RIE_RRUUU.r2,
+                                                 ovl.fmt.RIE_RRUUU.i3,
+                                                 ovl.fmt.RIE_RRUUU.i4,
+                                                 ovl.fmt.RIE_RRUUU.i5);
+                                                 goto ok;
+   case 0xec0000000056ULL: s390_format_RIE_RRUUU(s390_irgen_ROSBG,
+                                                 ovl.fmt.RIE_RRUUU.r1,
+                                                 ovl.fmt.RIE_RRUUU.r2,
+                                                 ovl.fmt.RIE_RRUUU.i3,
+                                                 ovl.fmt.RIE_RRUUU.i4,
+                                                 ovl.fmt.RIE_RRUUU.i5);
+                                                 goto ok;
+   case 0xec0000000057ULL: s390_format_RIE_RRUUU(s390_irgen_RXSBG,
+                                                 ovl.fmt.RIE_RRUUU.r1,
+                                                 ovl.fmt.RIE_RRUUU.r2,
+                                                 ovl.fmt.RIE_RRUUU.i3,
+                                                 ovl.fmt.RIE_RRUUU.i4,
+                                                 ovl.fmt.RIE_RRUUU.i5);
+                                                 goto ok;
+   case 0xec000000005dULL: /* RISBHG */ goto unimplemented;
+   case 0xec0000000064ULL: s390_format_RIE_RRPU(s390_irgen_CGRJ,
+                                                ovl.fmt.RIE_RRPU.r1,
+                                                ovl.fmt.RIE_RRPU.r2,
+                                                ovl.fmt.RIE_RRPU.i4,
+                                                ovl.fmt.RIE_RRPU.m3);  goto ok;
+   case 0xec0000000065ULL: s390_format_RIE_RRPU(s390_irgen_CLGRJ,
+                                                ovl.fmt.RIE_RRPU.r1,
+                                                ovl.fmt.RIE_RRPU.r2,
+                                                ovl.fmt.RIE_RRPU.i4,
+                                                ovl.fmt.RIE_RRPU.m3);  goto ok;
+   case 0xec0000000070ULL: /* CGIT */ goto unimplemented;
+   case 0xec0000000071ULL: /* CLGIT */ goto unimplemented;
+   case 0xec0000000072ULL: /* CIT */ goto unimplemented;
+   case 0xec0000000073ULL: /* CLFIT */ goto unimplemented;
+   case 0xec0000000076ULL: s390_format_RIE_RRPU(s390_irgen_CRJ,
+                                                ovl.fmt.RIE_RRPU.r1,
+                                                ovl.fmt.RIE_RRPU.r2,
+                                                ovl.fmt.RIE_RRPU.i4,
+                                                ovl.fmt.RIE_RRPU.m3);  goto ok;
+   case 0xec0000000077ULL: s390_format_RIE_RRPU(s390_irgen_CLRJ,
+                                                ovl.fmt.RIE_RRPU.r1,
+                                                ovl.fmt.RIE_RRPU.r2,
+                                                ovl.fmt.RIE_RRPU.i4,
+                                                ovl.fmt.RIE_RRPU.m3);  goto ok;
+   case 0xec000000007cULL: s390_format_RIE_RUPI(s390_irgen_CGIJ,
+                                                ovl.fmt.RIEv3.r1,
+                                                ovl.fmt.RIEv3.m3,
+                                                ovl.fmt.RIEv3.i4,
+                                                ovl.fmt.RIEv3.i2);  goto ok;
+   case 0xec000000007dULL: s390_format_RIE_RUPU(s390_irgen_CLGIJ,
+                                                ovl.fmt.RIEv3.r1,
+                                                ovl.fmt.RIEv3.m3,
+                                                ovl.fmt.RIEv3.i4,
+                                                ovl.fmt.RIEv3.i2);  goto ok;
+   case 0xec000000007eULL: s390_format_RIE_RUPI(s390_irgen_CIJ,
+                                                ovl.fmt.RIEv3.r1,
+                                                ovl.fmt.RIEv3.m3,
+                                                ovl.fmt.RIEv3.i4,
+                                                ovl.fmt.RIEv3.i2);  goto ok;
+   case 0xec000000007fULL: s390_format_RIE_RUPU(s390_irgen_CLIJ,
+                                                ovl.fmt.RIEv3.r1,
+                                                ovl.fmt.RIEv3.m3,
+                                                ovl.fmt.RIEv3.i4,
+                                                ovl.fmt.RIEv3.i2);  goto ok;
+   case 0xec00000000d8ULL: s390_format_RIE_RRI0(s390_irgen_AHIK, ovl.fmt.RIE.r1,
+                                                ovl.fmt.RIE.r3, ovl.fmt.RIE.i2);
+                                                goto ok;
+   case 0xec00000000d9ULL: s390_format_RIE_RRI0(s390_irgen_AGHIK,
+                                                ovl.fmt.RIE.r1, ovl.fmt.RIE.r3,
+                                                ovl.fmt.RIE.i2);  goto ok;
+   case 0xec00000000daULL: s390_format_RIE_RRI0(s390_irgen_ALHSIK,
+                                                ovl.fmt.RIE.r1, ovl.fmt.RIE.r3,
+                                                ovl.fmt.RIE.i2);  goto ok;
+   case 0xec00000000dbULL: s390_format_RIE_RRI0(s390_irgen_ALGHSIK,
+                                                ovl.fmt.RIE.r1, ovl.fmt.RIE.r3,
+                                                ovl.fmt.RIE.i2);  goto ok;
+   case 0xec00000000e4ULL: s390_format_RRS(s390_irgen_CGRB, ovl.fmt.RRS.r1,
+                                           ovl.fmt.RRS.r2, ovl.fmt.RRS.b4,
+                                           ovl.fmt.RRS.d4, ovl.fmt.RRS.m3);
+                                           goto ok;
+   case 0xec00000000e5ULL: s390_format_RRS(s390_irgen_CLGRB, ovl.fmt.RRS.r1,
+                                           ovl.fmt.RRS.r2, ovl.fmt.RRS.b4,
+                                           ovl.fmt.RRS.d4, ovl.fmt.RRS.m3);
+                                           goto ok;
+   case 0xec00000000f6ULL: s390_format_RRS(s390_irgen_CRB, ovl.fmt.RRS.r1,
+                                           ovl.fmt.RRS.r2, ovl.fmt.RRS.b4,
+                                           ovl.fmt.RRS.d4, ovl.fmt.RRS.m3);
+                                           goto ok;
+   case 0xec00000000f7ULL: s390_format_RRS(s390_irgen_CLRB, ovl.fmt.RRS.r1,
+                                           ovl.fmt.RRS.r2, ovl.fmt.RRS.b4,
+                                           ovl.fmt.RRS.d4, ovl.fmt.RRS.m3);
+                                           goto ok;
+   case 0xec00000000fcULL: s390_format_RIS_RURDI(s390_irgen_CGIB,
+                                                 ovl.fmt.RIS.r1, ovl.fmt.RIS.m3,
+                                                 ovl.fmt.RIS.b4, ovl.fmt.RIS.d4,
+                                                 ovl.fmt.RIS.i2);  goto ok;
+   case 0xec00000000fdULL: s390_format_RIS_RURDU(s390_irgen_CLGIB,
+                                                 ovl.fmt.RIS.r1, ovl.fmt.RIS.m3,
+                                                 ovl.fmt.RIS.b4, ovl.fmt.RIS.d4,
+                                                 ovl.fmt.RIS.i2);  goto ok;
+   case 0xec00000000feULL: s390_format_RIS_RURDI(s390_irgen_CIB, ovl.fmt.RIS.r1,
+                                                 ovl.fmt.RIS.m3, ovl.fmt.RIS.b4,
+                                                 ovl.fmt.RIS.d4,
+                                                 ovl.fmt.RIS.i2);  goto ok;
+   case 0xec00000000ffULL: s390_format_RIS_RURDU(s390_irgen_CLIB,
+                                                 ovl.fmt.RIS.r1, ovl.fmt.RIS.m3,
+                                                 ovl.fmt.RIS.b4, ovl.fmt.RIS.d4,
+                                                 ovl.fmt.RIS.i2);  goto ok;
+   case 0xed0000000004ULL: s390_format_RXE_FRRD(s390_irgen_LDEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000005ULL: s390_format_RXE_FRRD(s390_irgen_LXDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000006ULL: s390_format_RXE_FRRD(s390_irgen_LXEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000007ULL: /* MXDB */ goto unimplemented;
+   case 0xed0000000008ULL: /* KEB */ goto unimplemented;
+   case 0xed0000000009ULL: s390_format_RXE_FRRD(s390_irgen_CEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000000aULL: s390_format_RXE_FRRD(s390_irgen_AEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000000bULL: s390_format_RXE_FRRD(s390_irgen_SEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000000cULL: /* MDEB */ goto unimplemented;
+   case 0xed000000000dULL: s390_format_RXE_FRRD(s390_irgen_DEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000000eULL: s390_format_RXF_FRRDF(s390_irgen_MAEB,
+                                                 ovl.fmt.RXF.r3, ovl.fmt.RXF.x2,
+                                                 ovl.fmt.RXF.b2, ovl.fmt.RXF.d2,
+                                                 ovl.fmt.RXF.r1);  goto ok;
+   case 0xed000000000fULL: s390_format_RXF_FRRDF(s390_irgen_MSEB,
+                                                 ovl.fmt.RXF.r3, ovl.fmt.RXF.x2,
+                                                 ovl.fmt.RXF.b2, ovl.fmt.RXF.d2,
+                                                 ovl.fmt.RXF.r1);  goto ok;
+   case 0xed0000000010ULL: s390_format_RXE_FRRD(s390_irgen_TCEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000011ULL: s390_format_RXE_FRRD(s390_irgen_TCDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000012ULL: s390_format_RXE_FRRD(s390_irgen_TCXB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000014ULL: s390_format_RXE_FRRD(s390_irgen_SQEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000015ULL: s390_format_RXE_FRRD(s390_irgen_SQDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000017ULL: s390_format_RXE_FRRD(s390_irgen_MEEB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed0000000018ULL: /* KDB */ goto unimplemented;
+   case 0xed0000000019ULL: s390_format_RXE_FRRD(s390_irgen_CDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000001aULL: s390_format_RXE_FRRD(s390_irgen_ADB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000001bULL: s390_format_RXE_FRRD(s390_irgen_SDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000001cULL: s390_format_RXE_FRRD(s390_irgen_MDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000001dULL: s390_format_RXE_FRRD(s390_irgen_DDB, ovl.fmt.RXE.r1,
+                                                ovl.fmt.RXE.x2, ovl.fmt.RXE.b2,
+                                                ovl.fmt.RXE.d2);  goto ok;
+   case 0xed000000001eULL: s390_format_RXF_FRRDF(s390_irgen_MADB,
+                                                 ovl.fmt.RXF.r3, ovl.fmt.RXF.x2,
+                                                 ovl.fmt.RXF.b2, ovl.fmt.RXF.d2,
+                                                 ovl.fmt.RXF.r1);  goto ok;
+   case 0xed000000001fULL: s390_format_RXF_FRRDF(s390_irgen_MSDB,
+                                                 ovl.fmt.RXF.r3, ovl.fmt.RXF.x2,
+                                                 ovl.fmt.RXF.b2, ovl.fmt.RXF.d2,
+                                                 ovl.fmt.RXF.r1);  goto ok;
+   case 0xed0000000024ULL: /* LDE */ goto unimplemented;
+   case 0xed0000000025ULL: /* LXD */ goto unimplemented;
+   case 0xed0000000026ULL: /* LXE */ goto unimplemented;
+   case 0xed000000002eULL: /* MAE */ goto unimplemented;
+   case 0xed000000002fULL: /* MSE */ goto unimplemented;
+   case 0xed0000000034ULL: /* SQE */ goto unimplemented;
+   case 0xed0000000035ULL: /* SQD */ goto unimplemented;
+   case 0xed0000000037ULL: /* MEE */ goto unimplemented;
+   case 0xed0000000038ULL: /* MAYL */ goto unimplemented;
+   case 0xed0000000039ULL: /* MYL */ goto unimplemented;
+   case 0xed000000003aULL: /* MAY */ goto unimplemented;
+   case 0xed000000003bULL: /* MY */ goto unimplemented;
+   case 0xed000000003cULL: /* MAYH */ goto unimplemented;
+   case 0xed000000003dULL: /* MYH */ goto unimplemented;
+   case 0xed000000003eULL: /* MAD */ goto unimplemented;
+   case 0xed000000003fULL: /* MSD */ goto unimplemented;
+   case 0xed0000000040ULL: /* SLDT */ goto unimplemented;
+   case 0xed0000000041ULL: /* SRDT */ goto unimplemented;
+   case 0xed0000000048ULL: /* SLXT */ goto unimplemented;
+   case 0xed0000000049ULL: /* SRXT */ goto unimplemented;
+   case 0xed0000000050ULL: /* TDCET */ goto unimplemented;
+   case 0xed0000000051ULL: /* TDGET */ goto unimplemented;
+   case 0xed0000000054ULL: /* TDCDT */ goto unimplemented;
+   case 0xed0000000055ULL: /* TDGDT */ goto unimplemented;
+   case 0xed0000000058ULL: /* TDCXT */ goto unimplemented;
+   case 0xed0000000059ULL: /* TDGXT */ goto unimplemented;
+   case 0xed0000000064ULL: s390_format_RXY_FRRD(s390_irgen_LEY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xed0000000065ULL: s390_format_RXY_FRRD(s390_irgen_LDY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xed0000000066ULL: s390_format_RXY_FRRD(s390_irgen_STEY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   case 0xed0000000067ULL: s390_format_RXY_FRRD(s390_irgen_STDY, ovl.fmt.RXY.r1,
+                                                ovl.fmt.RXY.x2, ovl.fmt.RXY.b2,
+                                                ovl.fmt.RXY.dl2,
+                                                ovl.fmt.RXY.dh2);  goto ok;
+   }
+
+   switch (((ovl.value >> 16) & 0xff0f00000000ULL) >> 32) {
+   case 0xc000ULL: s390_format_RIL_RP(s390_irgen_LARL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc001ULL: s390_format_RIL_RI(s390_irgen_LGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc004ULL: s390_format_RIL(s390_irgen_BRCL, ovl.fmt.RIL.r1,
+                                   ovl.fmt.RIL.i2);  goto ok;
+   case 0xc005ULL: s390_format_RIL_RP(s390_irgen_BRASL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc006ULL: s390_format_RIL_RU(s390_irgen_XIHF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc007ULL: s390_format_RIL_RU(s390_irgen_XILF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc008ULL: s390_format_RIL_RU(s390_irgen_IIHF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc009ULL: s390_format_RIL_RU(s390_irgen_IILF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00aULL: s390_format_RIL_RU(s390_irgen_NIHF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00bULL: s390_format_RIL_RU(s390_irgen_NILF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00cULL: s390_format_RIL_RU(s390_irgen_OIHF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00dULL: s390_format_RIL_RU(s390_irgen_OILF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00eULL: s390_format_RIL_RU(s390_irgen_LLIHF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc00fULL: s390_format_RIL_RU(s390_irgen_LLILF, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc200ULL: s390_format_RIL_RI(s390_irgen_MSGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc201ULL: s390_format_RIL_RI(s390_irgen_MSFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc204ULL: s390_format_RIL_RU(s390_irgen_SLGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc205ULL: s390_format_RIL_RU(s390_irgen_SLFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc208ULL: s390_format_RIL_RI(s390_irgen_AGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc209ULL: s390_format_RIL_RI(s390_irgen_AFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20aULL: s390_format_RIL_RU(s390_irgen_ALGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20bULL: s390_format_RIL_RU(s390_irgen_ALFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20cULL: s390_format_RIL_RI(s390_irgen_CGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20dULL: s390_format_RIL_RI(s390_irgen_CFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20eULL: s390_format_RIL_RU(s390_irgen_CLGFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc20fULL: s390_format_RIL_RU(s390_irgen_CLFI, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc402ULL: s390_format_RIL_RP(s390_irgen_LLHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc404ULL: s390_format_RIL_RP(s390_irgen_LGHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc405ULL: s390_format_RIL_RP(s390_irgen_LHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc406ULL: s390_format_RIL_RP(s390_irgen_LLGHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc407ULL: s390_format_RIL_RP(s390_irgen_STHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc408ULL: s390_format_RIL_RP(s390_irgen_LGRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc40bULL: s390_format_RIL_RP(s390_irgen_STGRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc40cULL: s390_format_RIL_RP(s390_irgen_LGFRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc40dULL: s390_format_RIL_RP(s390_irgen_LRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc40eULL: s390_format_RIL_RP(s390_irgen_LLGFRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc40fULL: s390_format_RIL_RP(s390_irgen_STRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc600ULL: s390_format_RIL_RP(s390_irgen_EXRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc602ULL: s390_format_RIL_UP(s390_irgen_PFDRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc604ULL: s390_format_RIL_RP(s390_irgen_CGHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc605ULL: s390_format_RIL_RP(s390_irgen_CHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc606ULL: s390_format_RIL_RP(s390_irgen_CLGHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc607ULL: s390_format_RIL_RP(s390_irgen_CLHRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc608ULL: s390_format_RIL_RP(s390_irgen_CGRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc60aULL: s390_format_RIL_RP(s390_irgen_CLGRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc60cULL: s390_format_RIL_RP(s390_irgen_CGFRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc60dULL: s390_format_RIL_RP(s390_irgen_CRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc60eULL: s390_format_RIL_RP(s390_irgen_CLGFRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc60fULL: s390_format_RIL_RP(s390_irgen_CLRL, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xc800ULL: /* MVCOS */ goto unimplemented;
+   case 0xc801ULL: /* ECTG */ goto unimplemented;
+   case 0xc802ULL: /* CSST */ goto unimplemented;
+   case 0xc804ULL: /* LPD */ goto unimplemented;
+   case 0xc805ULL: /* LPDG */ goto unimplemented;
+   case 0xcc06ULL: /* BRCTH */ goto unimplemented;
+   case 0xcc08ULL: s390_format_RIL_RI(s390_irgen_AIH, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xcc0aULL: s390_format_RIL_RI(s390_irgen_ALSIH, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xcc0bULL: s390_format_RIL_RI(s390_irgen_ALSIHN, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xcc0dULL: s390_format_RIL_RI(s390_irgen_CIH, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   case 0xcc0fULL: s390_format_RIL_RU(s390_irgen_CLIH, ovl.fmt.RIL.r1,
+                                      ovl.fmt.RIL.i2);  goto ok;
+   }
+
+   switch (((ovl.value >> 16) & 0xff0000000000ULL) >> 40) {
+   case 0xd0ULL: /* TRTR */ goto unimplemented;
+   case 0xd1ULL: /* MVN */ goto unimplemented;
+   case 0xd2ULL: s390_format_SS_L0RDRD(s390_irgen_MVC, ovl.fmt.SS.l,
+                                       ovl.fmt.SS.b1, ovl.fmt.SS.d1,
+                                       ovl.fmt.SS.b2, ovl.fmt.SS.d2);  goto ok;
+   case 0xd3ULL: /* MVZ */ goto unimplemented;
+   case 0xd4ULL: s390_format_SS_L0RDRD(s390_irgen_NC, ovl.fmt.SS.l,
+                                       ovl.fmt.SS.b1, ovl.fmt.SS.d1,
+                                       ovl.fmt.SS.b2, ovl.fmt.SS.d2);  goto ok;
+   case 0xd5ULL: s390_format_SS_L0RDRD(s390_irgen_CLC, ovl.fmt.SS.l,
+                                       ovl.fmt.SS.b1, ovl.fmt.SS.d1,
+                                       ovl.fmt.SS.b2, ovl.fmt.SS.d2);  goto ok;
+   case 0xd6ULL: s390_format_SS_L0RDRD(s390_irgen_OC, ovl.fmt.SS.l,
+                                       ovl.fmt.SS.b1, ovl.fmt.SS.d1,
+                                       ovl.fmt.SS.b2, ovl.fmt.SS.d2);  goto ok;
+   case 0xd7ULL:
+      if (ovl.fmt.SS.b1 == ovl.fmt.SS.b2 && ovl.fmt.SS.d1 == ovl.fmt.SS.d2)
+         s390_irgen_XC_sameloc(ovl.fmt.SS.l, ovl.fmt.SS.b1, ovl.fmt.SS.d1);
+      else
+        s390_format_SS_L0RDRD(s390_irgen_XC, ovl.fmt.SS.l,
+                              ovl.fmt.SS.b1, ovl.fmt.SS.d1,
+                              ovl.fmt.SS.b2, ovl.fmt.SS.d2);
+      goto ok;
+   case 0xd9ULL: /* MVCK */ goto unimplemented;
+   case 0xdaULL: /* MVCP */ goto unimplemented;
+   case 0xdbULL: /* MVCS */ goto unimplemented;
+   case 0xdcULL: /* TR */ goto unimplemented;
+   case 0xddULL: /* TRT */ goto unimplemented;
+   case 0xdeULL: /* ED */ goto unimplemented;
+   case 0xdfULL: /* EDMK */ goto unimplemented;
+   case 0xe1ULL: /* PKU */ goto unimplemented;
+   case 0xe2ULL: /* UNPKU */ goto unimplemented;
+   case 0xe8ULL: /* MVCIN */ goto unimplemented;
+   case 0xe9ULL: /* PKA */ goto unimplemented;
+   case 0xeaULL: /* UNPKA */ goto unimplemented;
+   case 0xeeULL: /* PLO */ goto unimplemented;
+   case 0xefULL: /* LMD */ goto unimplemented;
+   case 0xf0ULL: /* SRP */ goto unimplemented;
+   case 0xf1ULL: /* MVO */ goto unimplemented;
+   case 0xf2ULL: /* PACK */ goto unimplemented;
+   case 0xf3ULL: /* UNPK */ goto unimplemented;
+   case 0xf8ULL: /* ZAP */ goto unimplemented;
+   case 0xf9ULL: /* CP */ goto unimplemented;
+   case 0xfaULL: /* AP */ goto unimplemented;
+   case 0xfbULL: /* SP */ goto unimplemented;
+   case 0xfcULL: /* MP */ goto unimplemented;
+   case 0xfdULL: /* DP */ goto unimplemented;
+   }
+
+   switch (((ovl.value >> 16) & 0xffff00000000ULL) >> 32) {
+   case 0xe500ULL: /* LASP */ goto unimplemented;
+   case 0xe501ULL: /* TPROT */ goto unimplemented;
+   case 0xe502ULL: /* STRAG */ goto unimplemented;
+   case 0xe50eULL: /* MVCSK */ goto unimplemented;
+   case 0xe50fULL: /* MVCDK */ goto unimplemented;
+   case 0xe544ULL: s390_format_SIL_RDI(s390_irgen_MVHHI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe548ULL: s390_format_SIL_RDI(s390_irgen_MVGHI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe54cULL: s390_format_SIL_RDI(s390_irgen_MVHI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe554ULL: s390_format_SIL_RDI(s390_irgen_CHHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe555ULL: s390_format_SIL_RDU(s390_irgen_CLHHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe558ULL: s390_format_SIL_RDI(s390_irgen_CGHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe559ULL: s390_format_SIL_RDU(s390_irgen_CLGHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe55cULL: s390_format_SIL_RDI(s390_irgen_CHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   case 0xe55dULL: s390_format_SIL_RDU(s390_irgen_CLFHSI, ovl.fmt.SIL.b1,
+                                       ovl.fmt.SIL.d1, ovl.fmt.SIL.i2);
+                                       goto ok;
+   }
+
+   return S390_DECODE_UNKNOWN_INSN;
+
+ok:
+   return S390_DECODE_OK;
+
+unimplemented:
+   return S390_DECODE_UNIMPLEMENTED_INSN;
+}
+
+/* Handle "special" instructions. */
+static s390_decode_t
+s390_decode_special_and_irgen(UChar *bytes)
+{
+   s390_decode_t status = S390_DECODE_OK;
+
+   /* Got a "Special" instruction preamble.  Which one is it? */
+   if (bytes[0] == 0x18 && bytes[1] == 0x22 /* lr %r2, %r2 */) {
+      s390_irgen_client_request();
+   } else if (bytes[0] == 0x18 && bytes[1] == 0x33 /* lr %r3, %r3 */) {
+      s390_irgen_guest_NRADDR();
+   } else if (bytes[0] == 0x18 && bytes[1] == 0x44 /* lr %r4, %r4 */) {
+      s390_irgen_call_noredir();
+   } else {
+      /* We don't know what it is. */
+      return S390_DECODE_UNKNOWN_SPECIAL_INSN;
+   }
+
+   dis_res->len = S390_SPECIAL_OP_PREAMBLE_SIZE + S390_SPECIAL_OP_SIZE;
+
+   return status;
+}
+
+
+/* Function returns # bytes that were decoded or 0 in case of failure */
+static UInt
+s390_decode_and_irgen(UChar *bytes, UInt insn_length, DisResult *dres)
+{
+   s390_decode_t status;
+
+   dis_res = dres;
+
+   /* Spot the 8-byte preamble:   18ff lr r15,r15
+                                  1811 lr r1,r1
+                                  1822 lr r2,r2
+                                  1833 lr r3,r3 */
+   if (bytes[ 0] == 0x18 && bytes[ 1] == 0xff && bytes[ 2] == 0x18 &&
+       bytes[ 3] == 0x11 && bytes[ 4] == 0x18 && bytes[ 5] == 0x22 &&
+       bytes[ 6] == 0x18 && bytes[ 7] == 0x33) {
+
+      /* Handle special instruction that follows that preamble. */
+      if (0) vex_printf("special function handling...\n");
+
+      insn_length = S390_SPECIAL_OP_PREAMBLE_SIZE + S390_SPECIAL_OP_SIZE;
+      guest_IA_next_instr = guest_IA_curr_instr + insn_length;
+
+      status =
+         s390_decode_special_and_irgen(bytes + S390_SPECIAL_OP_PREAMBLE_SIZE);
+   } else {
+      /* Handle normal instructions. */
+      switch (insn_length) {
+      case 2:
+         status = s390_decode_2byte_and_irgen(bytes);
+         break;
+
+      case 4:
+         status = s390_decode_4byte_and_irgen(bytes);
+         break;
+
+      case 6:
+         status = s390_decode_6byte_and_irgen(bytes);
+         break;
+
+      default:
+        status = S390_DECODE_ERROR;
+        break;
+      }
+   }
+   /* If next instruction is execute, stop here */
+   if (irsb->next == NULL && dis_res->whatNext == Dis_Continue
+       && bytes[insn_length] == 0x44) {
+      irsb->next = IRExpr_Const(IRConst_U64(guest_IA_next_instr));
+      dis_res->whatNext = Dis_StopHere;
+      dis_res->continueAt = 0;
+   }
+
+   if (status == S390_DECODE_OK) return insn_length;  /* OK */
+
+   /* Decoding failed somehow */
+   vex_printf("vex s390->IR: ");
+   switch (status) {
+   case S390_DECODE_UNKNOWN_INSN:
+      vex_printf("unknown insn: ");
+      break;
+
+   case S390_DECODE_UNIMPLEMENTED_INSN:
+      vex_printf("unimplemented insn: ");
+      break;
+
+   case S390_DECODE_UNKNOWN_SPECIAL_INSN:
+      vex_printf("unimplemented special insn: ");
+      break;
+
+   default:
+   case S390_DECODE_ERROR:
+      vex_printf("decoding error: ");
+      break;
+   }
+
+   vex_printf("%02x%02x", bytes[0], bytes[1]);
+   if (insn_length > 2) {
+      vex_printf(" %02x%02x", bytes[2], bytes[3]);
+   }
+   if (insn_length > 4) {
+      vex_printf(" %02x%02x", bytes[4], bytes[5]);
+   }
+   vex_printf("\n");
+
+   return 0;  /* Failed */
+}
+
+
+/* Generate an IRExpr for an address. */
+static __inline__ IRExpr *
+mkaddr_expr(Addr64 addr)
+{
+   return IRExpr_Const(IRConst_U64(addr));
+}
+
+
+/* Disassemble a single instruction INSN into IR. */
+static DisResult
+disInstr_S390_WRK(UChar *insn)
+{
+   UChar byte;
+   UInt  insn_length;
+   DisResult dres;
+
+   /* ---------------------------------------------------- */
+   /* --- Compute instruction length                    -- */
+   /* ---------------------------------------------------- */
+
+   /* Get the first byte of the insn. */
+   byte = insn[0];
+
+   /* The leftmost two bits (0:1) encode the length of the insn in bytes.
+      00 -> 2 bytes, 01 -> 4 bytes, 10 -> 4 bytes, 11 -> 6 bytes. */
+   insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
+
+   guest_IA_next_instr = guest_IA_curr_instr + insn_length;
+
+   /* ---------------------------------------------------- */
+   /* --- Initialise the DisResult data                 -- */
+   /* ---------------------------------------------------- */
+   dres.whatNext   = Dis_Continue;
+   dres.len        = insn_length;
+   dres.continueAt = 0;
+
+   /* fixs390: consider chasing of conditional jumps */
+
+   /* Normal and special instruction handling starts here. */
+   if (s390_decode_and_irgen(insn, insn_length, &dres) == 0) {
+      /* All decode failures end up here. The decoder has already issued an
+         error message.
+         Tell the dispatcher that this insn cannot be decoded, and so has
+         not been executed, and (is currently) the next to be executed.
+         IA should be up-to-date since it made so at the start of each
+         insn, but nevertheless be paranoid and update it again right
+         now. */
+      addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
+                                     mkaddr_expr(guest_IA_curr_instr)));
+
+      irsb->next = mkaddr_expr(guest_IA_next_instr);
+      irsb->jumpkind = Ijk_NoDecode;
+      dres.whatNext = Dis_StopHere;
+      dres.len = 0;
+   }
+
+   return dres;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Top-level fn                                         ---*/
+/*------------------------------------------------------------*/
+
+/* Disassemble a single instruction into IR.  The instruction
+   is located in host memory at &guest_code[delta]. */
+
+DisResult
+disInstr_S390(IRSB        *irsb_IN,
+              Bool         put_IP,
+              Bool       (*resteerOkFn)(void *, Addr64),
+              Bool         resteerCisOk,
+              void        *callback_opaque,
+              UChar       *guest_code,
+              Long         delta,
+              Addr64       guest_IP,
+              VexArch      guest_arch,
+              VexArchInfo *archinfo,
+              VexAbiInfo  *abiinfo,
+              Bool         host_bigendian)
+{
+   vassert(guest_arch == VexArchS390X);
+
+   /* The instruction decoder requires a big-endian machine. */
+   vassert(host_bigendian == True);
+
+   /* Set globals (see top of this file) */
+   guest_IA_curr_instr = guest_IP;
+   irsb = irsb_IN;
+   resteer_fn = resteerOkFn;
+   resteer_data = callback_opaque;
+
+   /* We may be asked to update the guest IA before going further. */
+   if (put_IP)
+      addStmtToIRSB(irsb, IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
+                                     mkaddr_expr(guest_IA_curr_instr)));
+
+   return disInstr_S390_WRK(guest_code + delta);
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                   guest_s390_toIR.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/guest_x86_defs.h b/main/VEX/priv/guest_x86_defs.h
index 9633533..130d84d 100644
--- a/main/VEX/priv/guest_x86_defs.h
+++ b/main/VEX/priv/guest_x86_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -86,7 +86,7 @@
                 UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep 
              );
 
-__attribute((regparm(3)))
+VEX_REGPARM(3)
 extern UInt  x86g_calculate_eflags_c ( 
                 UInt cc_op, UInt cc_dep1, UInt cc_dep2, UInt cc_ndep 
              );
diff --git a/main/VEX/priv/guest_x86_helpers.c b/main/VEX/priv/guest_x86_helpers.c
index c538c07..2bfe210 100644
--- a/main/VEX/priv/guest_x86_helpers.c
+++ b/main/VEX/priv/guest_x86_helpers.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -588,7 +588,7 @@
 
 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
 /* Calculate just the carry flag from the supplied thunk parameters. */
-__attribute((regparm(3)))
+VEX_REGPARM(3)
 UInt x86g_calculate_eflags_c ( UInt cc_op, 
                                UInt cc_dep1, 
                                UInt cc_dep2,
diff --git a/main/VEX/priv/guest_x86_toIR.c b/main/VEX/priv/guest_x86_toIR.c
index 9919fa6..363d66b 100644
--- a/main/VEX/priv/guest_x86_toIR.c
+++ b/main/VEX/priv/guest_x86_toIR.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -998,7 +998,10 @@
                                    widenUto32(mkexpr(resUS)))) );
    /* Set NDEP even though it isn't used.  This makes redundant-PUT
       elimination of previous stores to this field work better. */
-   stmt( IRStmt_Put( OFFB_CC_NDEP, mkU32(0) ));
+   stmt( IRStmt_Put( OFFB_CC_NDEP,
+                     IRExpr_Mux0X( mkexpr(guard),
+                                   IRExpr_Get(OFFB_CC_NDEP,Ity_I32),
+				   mkU32(0) )));
 }
 
 
@@ -2389,7 +2392,7 @@
    }
 
    isShift = False;
-   switch (gregOfRM(modrm)) { case 4: case 5: case 7: isShift = True; }
+   switch (gregOfRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
 
    isRotate = False;
    switch (gregOfRM(modrm)) { case 0: case 1: isRotate = True; }
@@ -2397,11 +2400,6 @@
    isRotateC = False;
    switch (gregOfRM(modrm)) { case 2: case 3: isRotateC = True; }
 
-   if (gregOfRM(modrm) == 6) {
-      *decode_OK = False;
-      return delta;
-   }
-
    if (!isShift && !isRotate && !isRotateC) {
       /*NOTREACHED*/
       vpanic("dis_Grp2(Reg): unhandled case(x86)");
@@ -2446,6 +2444,7 @@
       switch (gregOfRM(modrm)) { 
          case 4: op32 = Iop_Shl32; break;
          case 5: op32 = Iop_Shr32; break;
+         case 6: op32 = Iop_Shl32; break;
          case 7: op32 = Iop_Sar32; break;
          /*NOTREACHED*/
          default: vpanic("dis_Grp2:shift"); break;
@@ -5472,9 +5471,9 @@
       case 0x65: op = Iop_CmpGT16Sx4; break;
       case 0x66: op = Iop_CmpGT32Sx2; break;
 
-      case 0x6B: op = Iop_QNarrow32Sx2; eLeft = True; break;
-      case 0x63: op = Iop_QNarrow16Sx4; eLeft = True; break;
-      case 0x67: op = Iop_QNarrow16Ux4; eLeft = True; break;
+      case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
+      case 0x63: op = Iop_QNarrowBin16Sto8Sx8;  eLeft = True; break;
+      case 0x67: op = Iop_QNarrowBin16Sto8Ux8;  eLeft = True; break;
 
       case 0x68: op = Iop_InterleaveHI8x8;  eLeft = True; break;
       case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
@@ -8099,6 +8098,7 @@
 
       addr = disAMode ( &alen, sorb, delta+2, dis_buf );
       delta += 2+alen;
+      gen_SEGV_if_not_16_aligned(addr);
 
       DIP("fxsave %s\n", dis_buf);
 
@@ -8169,11 +8169,15 @@
 
       addr = disAMode ( &alen, sorb, delta+2, dis_buf );
       delta += 2+alen;
+      gen_SEGV_if_not_16_aligned(addr);
 
       DIP("fxrstor %s\n", dis_buf);
 
       /* Uses dirty helper: 
-            void x86g_do_FXRSTOR ( VexGuestX86State*, UInt ) */
+            VexEmWarn x86g_do_FXRSTOR ( VexGuestX86State*, UInt )
+         NOTE:
+            the VexEmWarn value is simply ignored (unlike for FRSTOR)
+      */
       d = unsafeIRDirty_0_N ( 
              0/*regparms*/, 
              "x86g_dirtyhelper_FXRSTOR", 
@@ -10528,21 +10532,24 @@
    /* 66 0F 6B = PACKSSDW */
    if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x6B) {
       delta = dis_SSEint_E_to_G( sorb, delta+2, 
-                                 "packssdw", Iop_QNarrow32Sx4, True );
+                                 "packssdw",
+                                 Iop_QNarrowBin32Sto16Sx8, True );
       goto decode_success;
    }
 
    /* 66 0F 63 = PACKSSWB */
    if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x63) {
       delta = dis_SSEint_E_to_G( sorb, delta+2, 
-                                 "packsswb", Iop_QNarrow16Sx8, True );
+                                 "packsswb",
+                                 Iop_QNarrowBin16Sto8Sx16, True );
       goto decode_success;
    }
 
    /* 66 0F 67 = PACKUSWB */
    if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x67) {
       delta = dis_SSEint_E_to_G( sorb, delta+2, 
-                                 "packuswb", Iop_QNarrow16Ux8, True );
+                                 "packuswb",
+                                 Iop_QNarrowBin16Sto8Ux16, True );
       goto decode_success;
    }
 
@@ -14999,11 +15006,12 @@
          break;
       }
 
+      case 0x0E: /* FEMMS */
       case 0x77: /* EMMS */
          if (sz != 4)
             goto decode_failure;
          do_EMMS_preamble();
-         DIP("emms\n");
+         DIP("{f}emms\n");
          break;
 
       /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
diff --git a/main/VEX/priv/host_amd64_defs.c b/main/VEX/priv/host_amd64_defs.c
index 01df33d..807ab4b 100644
--- a/main/VEX/priv/host_amd64_defs.c
+++ b/main/VEX/priv/host_amd64_defs.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -314,13 +314,16 @@
    return op;
 }
 
-void ppAMD64RMI ( AMD64RMI* op ) {
+static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
    switch (op->tag) {
       case Armi_Imm: 
          vex_printf("$0x%x", op->Armi.Imm.imm32);
          return;
-      case Armi_Reg: 
-         ppHRegAMD64(op->Armi.Reg.reg);
+      case Armi_Reg:
+         if (lo32)
+            ppHRegAMD64_lo32(op->Armi.Reg.reg);
+         else
+            ppHRegAMD64(op->Armi.Reg.reg);
          return;
       case Armi_Mem: 
          ppAMD64AMode(op->Armi.Mem.am);
@@ -329,6 +332,12 @@
          vpanic("ppAMD64RMI");
    }
 }
+void ppAMD64RMI ( AMD64RMI* op ) {
+   ppAMD64RMI_wrk(op, False/*!lo32*/);
+}
+void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
+   ppAMD64RMI_wrk(op, True/*lo32*/);
+}
 
 /* An AMD64RMI can only be used in a "read" context (what would it mean
    to write or modify a literal?) and so we enumerate its registers
@@ -679,6 +688,19 @@
    i->Ain.Lea64.dst   = dst;
    return i;
 }
+AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
+   AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
+   i->tag            = Ain_Alu32R;
+   i->Ain.Alu32R.op  = op;
+   i->Ain.Alu32R.src = src;
+   i->Ain.Alu32R.dst = dst;
+   switch (op) {
+      case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
+      case Aalu_AND: case Aalu_OR:  case Aalu_XOR: break;
+      default: vassert(0);
+   }
+   return i;
+}
 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
    AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
    i->tag            = Ain_MulL;
@@ -1083,6 +1105,12 @@
          vex_printf(",");
          ppHRegAMD64(i->Ain.Lea64.dst);
          return;
+      case Ain_Alu32R:
+         vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
+         ppAMD64RMI_lo32(i->Ain.Alu32R.src);
+         vex_printf(",");
+         ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
+         return;
       case Ain_MulL:
          vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
          ppAMD64RM(i->Ain.MulL.src);
@@ -1423,6 +1451,15 @@
          addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
          addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
          return;
+      case Ain_Alu32R:
+         vassert(i->Ain.Alu32R.op != Aalu_MOV);
+         addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
+         if (i->Ain.Alu32R.op == Aalu_CMP) { 
+            addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
+            return;
+         }
+         addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
+         return;
       case Ain_MulL:
          addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
          addHRegUse(u, HRmModify, hregAMD64_RAX());
@@ -1719,6 +1756,10 @@
          mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
          mapReg(m, &i->Ain.Lea64.dst);
          return;
+      case Ain_Alu32R:
+         mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
+         mapReg(m, &i->Ain.Alu32R.dst);
+         return;
       case Ain_MulL:
          mapRegs_AMD64RM(m, i->Ain.MulL.src);
          return;
@@ -2303,7 +2344,9 @@
    imperative to emit position-independent code. */
 
 Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, 
-                      Bool mode64, void* dispatch )
+                      Bool mode64,
+                      void* dispatch_unassisted,
+                      void* dispatch_assisted )
 {
    UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
    UInt   xtra;
@@ -2325,9 +2368,20 @@
    switch (i->tag) {
 
    case Ain_Imm64:
-      *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
-      *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
-      p = emit64(p, i->Ain.Imm64.imm64);
+      if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
+         /* Use the short form (load into 32 bit reg, + default
+            widening rule) for constants under 1 million.  We could
+            use this form for the range 0 to 0x7FFFFFFF inclusive, but
+            limit it to a smaller range for verifiability purposes. */
+         if (1 & iregBit3(i->Ain.Imm64.dst))
+            *p++ = 0x41;
+         *p++ = 0xB8 + iregBits210(i->Ain.Imm64.dst);
+         p = emit32(p, (UInt)i->Ain.Imm64.imm64);
+      } else {
+         *p++ = toUChar(0x48 + (1 & iregBit3(i->Ain.Imm64.dst)));
+         *p++ = toUChar(0xB8 + iregBits210(i->Ain.Imm64.dst));
+         p = emit64(p, i->Ain.Imm64.imm64);
+      }
       goto done;
 
    case Ain_Alu64R:
@@ -2335,7 +2389,7 @@
       if (i->Ain.Alu64R.op == Aalu_MOV) {
          switch (i->Ain.Alu64R.src->tag) {
             case Armi_Imm:
-               if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFF)) {
+               if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
                   /* Actually we could use this form for constants in
                      the range 0 through 0x7FFFFFFF inclusive, but
                      limit it to a small range for verifiability
@@ -2573,6 +2627,69 @@
       p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
       goto done;
 
+   case Ain_Alu32R:
+      /* ADD/SUB/AND/OR/XOR/CMP */
+      opc = opc_rr = subopc_imm = opc_imma = 0;
+      switch (i->Ain.Alu32R.op) {
+         case Aalu_ADD: opc = 0x03; opc_rr = 0x01; 
+                        subopc_imm = 0; opc_imma = 0x05; break;
+         case Aalu_SUB: opc = 0x2B; opc_rr = 0x29; 
+                        subopc_imm = 5; opc_imma = 0x2D; break;
+         case Aalu_AND: opc = 0x23; opc_rr = 0x21; 
+                        subopc_imm = 4; opc_imma = 0x25; break;
+         case Aalu_XOR: opc = 0x33; opc_rr = 0x31; 
+                        subopc_imm = 6; opc_imma = 0x35; break;
+         case Aalu_OR:  opc = 0x0B; opc_rr = 0x09; 
+                        subopc_imm = 1; opc_imma = 0x0D; break;
+         case Aalu_CMP: opc = 0x3B; opc_rr = 0x39; 
+                        subopc_imm = 7; opc_imma = 0x3D; break;
+         default: goto bad;
+      }
+      switch (i->Ain.Alu32R.src->tag) {
+         case Armi_Imm:
+            if (i->Ain.Alu32R.dst == hregAMD64_RAX()
+                && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
+               goto bad; /* FIXME: awaiting test case */
+               *p++ = toUChar(opc_imma);
+               p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
+            } else
+            if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
+               rex  = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst ) );
+               if (rex != 0x40) *p++ = rex;
+               *p++ = 0x83; 
+               p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
+               *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
+            } else {
+               rex  = clearWBit( rexAMode_R( fake(0), i->Ain.Alu32R.dst) );
+               if (rex != 0x40) *p++ = rex;
+               *p++ = 0x81; 
+               p    = doAMode_R(p, fake(subopc_imm), i->Ain.Alu32R.dst);
+               p    = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
+            }
+            goto done;
+         case Armi_Reg:
+            rex  = clearWBit( 
+                   rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
+                               i->Ain.Alu32R.dst) );
+            if (rex != 0x40) *p++ = rex;
+            *p++ = toUChar(opc_rr);
+            p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
+                             i->Ain.Alu32R.dst);
+            goto done;
+         case Armi_Mem:
+            rex  = clearWBit(
+                   rexAMode_M( i->Ain.Alu32R.dst,
+                               i->Ain.Alu32R.src->Armi.Mem.am) );
+            if (rex != 0x40) *p++ = rex;
+            *p++ = toUChar(opc);
+            p = doAMode_M(p, i->Ain.Alu32R.dst,
+                             i->Ain.Alu32R.src->Armi.Mem.am);
+            goto done;
+         default: 
+            goto bad;
+      }
+      break;
+
    case Ain_MulL:
       subopc = i->Ain.MulL.syned ? 5 : 4;
       switch (i->Ain.MulL.src->tag)  {
@@ -2705,7 +2822,11 @@
       goto done;
    }
 
-   case Ain_Goto:
+   case Ain_Goto: {
+      void* dispatch_to_use = NULL;
+      vassert(dispatch_unassisted != NULL);
+      vassert(dispatch_assisted != NULL);
+
       /* Use ptmp for backpatching conditional jumps. */
       ptmp = NULL;
 
@@ -2721,7 +2842,10 @@
       /* If a non-boring, set %rbp (the guest state pointer)
          appropriately.  Since these numbers are all small positive
          integers, we can get away with "movl $N, %ebp" rather than
-         the longer "movq $N, %rbp". */
+         the longer "movq $N, %rbp".  Also, decide which dispatcher we
+         need to use. */
+      dispatch_to_use = dispatch_assisted;
+
       /* movl $magic_number, %ebp */
       switch (i->Ain.Goto.jk) {
          case Ijk_ClientReq: 
@@ -2763,6 +2887,7 @@
          case Ijk_Ret:
          case Ijk_Call:
          case Ijk_Boring:
+            dispatch_to_use = dispatch_unassisted;
             break;
          default: 
             ppIRJumpKind(i->Ain.Goto.jk);
@@ -2790,19 +2915,18 @@
          after the load of %rax since %rdx might be carrying the value
          destined for %rax immediately prior to this Ain_Goto. */
       vassert(sizeof(ULong) == sizeof(void*));
-      vassert(dispatch != NULL);
 
-      if (fitsIn32Bits(Ptr_to_ULong(dispatch))) {
+      if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) {
          /* movl sign-extend(imm32), %rdx */
          *p++ = 0x48;
          *p++ = 0xC7;
          *p++ = 0xC2;
-         p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
+         p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
       } else {
          /* movabsq $imm64, %rdx */
          *p++ = 0x48;
          *p++ = 0xBA;
-         p = emit64(p, Ptr_to_ULong(dispatch));
+         p = emit64(p, Ptr_to_ULong(dispatch_to_use));
       }
       /* jmp *%rdx */
       *p++ = 0xFF;
@@ -2815,6 +2939,7 @@
          *ptmp = toUChar(delta-1);
       }
       goto done;
+   }
 
    case Ain_CMov64:
       vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
diff --git a/main/VEX/priv/host_amd64_defs.h b/main/VEX/priv/host_amd64_defs.h
index cf19bac..4e7ae05 100644
--- a/main/VEX/priv/host_amd64_defs.h
+++ b/main/VEX/priv/host_amd64_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -189,7 +189,8 @@
 extern AMD64RMI* AMD64RMI_Reg ( HReg );
 extern AMD64RMI* AMD64RMI_Mem ( AMD64AMode* );
 
-extern void ppAMD64RMI ( AMD64RMI* );
+extern void ppAMD64RMI      ( AMD64RMI* );
+extern void ppAMD64RMI_lo32 ( AMD64RMI* );
 
 
 /* --------- Operand, which can be reg or immediate only. --------- */
@@ -359,6 +360,7 @@
       Ain_Test64,      /* 64-bit test (AND, set flags, discard result) */
       Ain_Unary64,     /* 64-bit not and neg */
       Ain_Lea64,       /* 64-bit compute EA into a reg */
+      Ain_Alu32R,      /* 32-bit add/sub/and/or/xor/cmp, dst=REG (a la Alu64R) */
       Ain_MulL,        /* widening multiply */
       Ain_Div,         /* div and mod */
 //..       Xin_Sh3232,    /* shldl or shrdl */
@@ -449,6 +451,12 @@
             AMD64AMode* am;
             HReg        dst;
          } Lea64;
+         /* 32-bit add/sub/and/or/xor/cmp, dst=REG (a la Alu64R) */
+         struct {
+            AMD64AluOp op;
+            AMD64RMI*  src;
+            HReg       dst;
+         } Alu32R;
          /* 64 x 64 -> 128 bit widening multiply: RDX:RAX = RAX *s/u
             r/m64 */
          struct {
@@ -676,6 +684,7 @@
 extern AMD64Instr* AMD64Instr_Alu64M     ( AMD64AluOp, AMD64RI*,  AMD64AMode* );
 extern AMD64Instr* AMD64Instr_Unary64    ( AMD64UnaryOp op, HReg dst );
 extern AMD64Instr* AMD64Instr_Lea64      ( AMD64AMode* am, HReg dst );
+extern AMD64Instr* AMD64Instr_Alu32R     ( AMD64AluOp, AMD64RMI*, HReg );
 extern AMD64Instr* AMD64Instr_Sh64       ( AMD64ShiftOp, UInt, HReg );
 extern AMD64Instr* AMD64Instr_Test64     ( UInt imm32, HReg dst );
 extern AMD64Instr* AMD64Instr_MulL       ( Bool syned, AMD64RM* );
@@ -734,7 +743,9 @@
 extern void         mapRegs_AMD64Instr     ( HRegRemap*, AMD64Instr*, Bool );
 extern Bool         isMove_AMD64Instr      ( AMD64Instr*, HReg*, HReg* );
 extern Int          emit_AMD64Instr        ( UChar* buf, Int nbuf, AMD64Instr*, 
-                                             Bool, void* dispatch );
+                                             Bool,
+                                             void* dispatch_unassisted,
+                                             void* dispatch_assisted );
 
 extern void genSpill_AMD64  ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
                               HReg rreg, Int offset, Bool );
diff --git a/main/VEX/priv/host_amd64_isel.c b/main/VEX/priv/host_amd64_isel.c
index 8f8e4a3..bcd213f 100644
--- a/main/VEX/priv/host_amd64_isel.c
+++ b/main/VEX/priv/host_amd64_isel.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -863,7 +863,10 @@
    DECLARE_PATTERN(p_LDle16_then_16Uto64);
 
    IRType ty = typeOfIRExpr(env->type_env,e);
-   vassert(ty == Ity_I32 || Ity_I16 || Ity_I8);
+   switch (ty) {
+      case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8: break;
+      default: vassert(0);
+   }
 
    switch (e->tag) {
 
@@ -1091,12 +1094,16 @@
          case Iop_QAdd16Ux4:
             fn = (HWord)h_generic_calc_QAdd16Ux4; break;
 
-         case Iop_QNarrow32Sx2:
-            fn = (HWord)h_generic_calc_QNarrow32Sx2; break;
-         case Iop_QNarrow16Sx4:
-            fn = (HWord)h_generic_calc_QNarrow16Sx4; break;
-         case Iop_QNarrow16Ux4:
-            fn = (HWord)h_generic_calc_QNarrow16Ux4; break;
+         case Iop_QNarrowBin32Sto16Sx4:
+            fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; break;
+         case Iop_QNarrowBin16Sto8Sx8:
+            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
+         case Iop_QNarrowBin16Sto8Ux8:
+            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
 
          case Iop_QSub8Sx8:
             fn = (HWord)h_generic_calc_QSub8Sx8; break;
@@ -1170,19 +1177,11 @@
       /* Handle misc other ops. */
 
       if (e->Iex.Binop.op == Iop_Max32U) {
-         /* This generates a truly rotten piece of code.  Just as well
-            it doesn't happen very often. */
-         HReg src1  = iselIntExpr_R(env, e->Iex.Binop.arg1);
-         HReg src1L = newVRegI(env);
-         HReg src2  = iselIntExpr_R(env, e->Iex.Binop.arg2);
-         HReg src2L = newVRegI(env);
-         HReg dst   = newVRegI(env);
-         addInstr(env, mk_iMOVsd_RR(src1,dst));
-         addInstr(env, mk_iMOVsd_RR(src1,src1L));
-         addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src1L));
-         addInstr(env, mk_iMOVsd_RR(src2,src2L));
-         addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, src2L));
-         addInstr(env, AMD64Instr_Alu64R(Aalu_CMP, AMD64RMI_Reg(src2L), src1L));
+         HReg src1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
+         HReg dst  = newVRegI(env);
+         HReg src2 = iselIntExpr_R(env, e->Iex.Binop.arg2);
+         addInstr(env, mk_iMOVsd_RR(src1, dst));
+         addInstr(env, AMD64Instr_Alu32R(Aalu_CMP, AMD64RMI_Reg(src2), dst));
          addInstr(env, AMD64Instr_CMov64(Acc_B, AMD64RM_Reg(src2), dst));
          return dst;
       }
@@ -1419,6 +1418,36 @@
          }
       }
 
+      /* 32Uto64( Add32/Sub32/And32/Or32/Xor32(expr32, expr32) )
+         Use 32 bit arithmetic and let the default zero-extend rule
+         do the 32Uto64 for free. */
+      if (e->Iex.Unop.op == Iop_32Uto64 && e->Iex.Unop.arg->tag == Iex_Binop) {
+         IROp    opi  = e->Iex.Unop.arg->Iex.Binop.op; /* inner op */
+         IRExpr* argL = e->Iex.Unop.arg->Iex.Binop.arg1;
+         IRExpr* argR = e->Iex.Unop.arg->Iex.Binop.arg2;
+         AMD64AluOp aluOp = Aalu_INVALID;
+         switch (opi) {
+            case Iop_Add32: aluOp = Aalu_ADD; break;
+            case Iop_Sub32: aluOp = Aalu_SUB; break;
+            case Iop_And32: aluOp = Aalu_AND; break;
+            case Iop_Or32:  aluOp = Aalu_OR;  break;
+            case Iop_Xor32: aluOp = Aalu_XOR; break;
+            default: break;
+         }
+         if (aluOp != Aalu_INVALID) {
+            /* For commutative ops we assume any literal values are on
+               the second operand. */
+            HReg dst      = newVRegI(env);
+            HReg reg      = iselIntExpr_R(env, argL);
+            AMD64RMI* rmi = iselIntExpr_RMI(env, argR);
+            addInstr(env, mk_iMOVsd_RR(reg,dst));
+            addInstr(env, AMD64Instr_Alu32R(aluOp, rmi, dst));
+            return dst;
+         }
+         /* just fall through to normal handling for Iop_32Uto64 */
+      }
+
+      /* Fallback cases */
       switch (e->Iex.Unop.op) {
          case Iop_32Uto64:
          case Iop_32Sto64: {
@@ -2173,10 +2202,8 @@
    if (e->tag == Iex_Unop 
        && e->Iex.Unop.op == Iop_CmpNEZ32) {
       HReg      r1   = iselIntExpr_R(env, e->Iex.Unop.arg);
-      HReg      tmp  = newVRegI(env);
       AMD64RMI* rmi2 = AMD64RMI_Imm(0);
-      addInstr(env, AMD64Instr_MovxLQ(False, r1, tmp));
-      addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,rmi2,tmp));
+      addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
       return Acc_NZ;
    }
 
@@ -2246,23 +2273,24 @@
       }
    }
 
-   /* CmpEQ32 / CmpNE32 */
+   /* CmpNE64(ccall, 64-bit constant) (--smc-check=all optimisation).
+      Saves a "movq %rax, %tmp" compared to the default route. */
    if (e->tag == Iex_Binop 
-       && (e->Iex.Binop.op == Iop_CmpEQ32
-           || e->Iex.Binop.op == Iop_CmpNE32
-           || e->Iex.Binop.op == Iop_CasCmpEQ32
-           || e->Iex.Binop.op == Iop_CasCmpNE32)) {
-      HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
-      AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
-      HReg      r    = newVRegI(env);
-      addInstr(env, mk_iMOVsd_RR(r1,r));
-      addInstr(env, AMD64Instr_Alu64R(Aalu_XOR,rmi2,r));
-      addInstr(env, AMD64Instr_Sh64(Ash_SHL, 32, r));
-      switch (e->Iex.Binop.op) {
-         case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
-         case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
-         default: vpanic("iselCondCode(amd64): CmpXX32");
-      }
+       && e->Iex.Binop.op == Iop_CmpNE64
+       && e->Iex.Binop.arg1->tag == Iex_CCall
+       && e->Iex.Binop.arg2->tag == Iex_Const) {
+      IRExpr* cal = e->Iex.Binop.arg1;
+      IRExpr* con = e->Iex.Binop.arg2;
+      HReg    tmp = newVRegI(env);
+      /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
+      vassert(cal->Iex.CCall.retty == Ity_I64); /* else ill-typed IR */
+      vassert(con->Iex.Const.con->tag == Ico_U64);
+      /* Marshal args, do the call. */
+      doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
+      addInstr(env, AMD64Instr_Imm64(con->Iex.Const.con->Ico.U64, tmp));
+      addInstr(env, AMD64Instr_Alu64R(Aalu_CMP,
+                                      AMD64RMI_Reg(hregAMD64_RAX()), tmp));
+      return Acc_NZ;
    }
 
    /* Cmp*64*(x,y) */
@@ -2289,6 +2317,30 @@
       }
    }
 
+   /* Cmp*32*(x,y) */
+   if (e->tag == Iex_Binop 
+       && (e->Iex.Binop.op == Iop_CmpEQ32
+           || e->Iex.Binop.op == Iop_CmpNE32
+           || e->Iex.Binop.op == Iop_CmpLT32S
+           || e->Iex.Binop.op == Iop_CmpLT32U
+           || e->Iex.Binop.op == Iop_CmpLE32S
+           || e->Iex.Binop.op == Iop_CmpLE32U
+           || e->Iex.Binop.op == Iop_CasCmpEQ32
+           || e->Iex.Binop.op == Iop_CasCmpNE32)) {
+      HReg      r1   = iselIntExpr_R(env, e->Iex.Binop.arg1);
+      AMD64RMI* rmi2 = iselIntExpr_RMI(env, e->Iex.Binop.arg2);
+      addInstr(env, AMD64Instr_Alu32R(Aalu_CMP,rmi2,r1));
+      switch (e->Iex.Binop.op) {
+         case Iop_CmpEQ32: case Iop_CasCmpEQ32: return Acc_Z;
+         case Iop_CmpNE32: case Iop_CasCmpNE32: return Acc_NZ;
+	 case Iop_CmpLT32S: return Acc_L;
+	 case Iop_CmpLT32U: return Acc_B;
+	 case Iop_CmpLE32S: return Acc_LE;
+         case Iop_CmpLE32U: return Acc_BE;
+         default: vpanic("iselCondCode(amd64): CmpXX32");
+      }
+   }
+
    ppIRExpr(e);
    vpanic("iselCondCode(amd64)");
 }
@@ -3496,11 +3548,11 @@
          return dst;
       }
 
-      case Iop_QNarrow32Sx4: 
+      case Iop_QNarrowBin32Sto16Sx8: 
          op = Asse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
-      case Iop_QNarrow16Sx8: 
+      case Iop_QNarrowBin16Sto8Sx16: 
          op = Asse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
-      case Iop_QNarrow16Ux8: 
+      case Iop_QNarrowBin16Sto8Ux16: 
          op = Asse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
 
       case Iop_InterleaveHI8x16: 
@@ -3610,8 +3662,19 @@
                            goto do_SseAssistedBinary;
       case Iop_Min8Sx16:   fn = (HWord)h_generic_calc_Min8Sx16;
                            goto do_SseAssistedBinary;
+      case Iop_CmpEQ64x2:  fn = (HWord)h_generic_calc_CmpEQ64x2;
+                           goto do_SseAssistedBinary;
       case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
                            goto do_SseAssistedBinary;
+      case Iop_QNarrowBin32Sto16Ux8:
+                           fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
+                           goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+                           fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+                           goto do_SseAssistedBinary;
+      case Iop_NarrowBin32to16x8:
+                           fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+                           goto do_SseAssistedBinary;
       do_SseAssistedBinary: {
          /* RRRufff!  RRRufff code is what we're generating here.  Oh
             well. */
diff --git a/main/VEX/priv/host_arm_defs.c b/main/VEX/priv/host_arm_defs.c
index db1e9c9..fc3c02c 100644
--- a/main/VEX/priv/host_arm_defs.c
+++ b/main/VEX/priv/host_arm_defs.c
@@ -7,11 +7,11 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    NEON support is
-   Copyright (C) 2010-2010 Samsung Electronics
+   Copyright (C) 2010-2011 Samsung Electronics
    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
               and Kirill Batuzov <batuzovk@ispras.ru>
 
@@ -588,7 +588,7 @@
          return x;
       case 10:
          x |= (x & 0x80) << 5;
-         x |= ~(x & 0x40) << 5;
+         x |= (~x & 0x40) << 5;
          x &= 0x187F; /* 0001 1000 0111 1111 */
          x |= (x & 0x40) << 4;
          x |= (x & 0x40) << 3;
@@ -1206,14 +1206,14 @@
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
    i->tag             = ARMin_LdrEX;
    i->ARMin.LdrEX.szB = szB;
-   vassert(szB == 8 || szB == 4 || szB == 1);
+   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    return i;
 }
 ARMInstr* ARMInstr_StrEX ( Int szB ) {
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
    i->tag             = ARMin_StrEX;
    i->ARMin.StrEX.szB = szB;
-   vassert(szB == 8 || szB == 4 || szB == 1);
+   vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
    return i;
 }
 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
@@ -1338,6 +1338,11 @@
    i->tag      = ARMin_MFence;
    return i;
 }
+ARMInstr* ARMInstr_CLREX( void ) {
+   ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+   i->tag      = ARMin_CLREX;
+   return i;
+}
 
 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
@@ -1369,7 +1374,7 @@
    return i;
 }
 
-ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOp op, ARMNRS* dst, ARMNRS* src,
+ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
                              UInt size, Bool Q ) {
    ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
    i->tag                = ARMin_NUnaryS;
@@ -1603,24 +1608,28 @@
             vex_printf("r1:r0, r2, r3");
          }
          return;
-      case ARMin_LdrEX:
-         if (i->ARMin.LdrEX.szB == 8) {
-            vex_printf("ldrexd r2, r3, [r0]");
-         } else {
-            vex_printf("ldrex%s ", i->ARMin.LdrEX.szB == 1 ? "b"
-                                 : i->ARMin.LdrEX.szB == 2 ? "h" : "");
-            vex_printf("r2, [r0]");
-         }
+      case ARMin_LdrEX: {
+         HChar* sz = "";
+         switch (i->ARMin.LdrEX.szB) {
+            case 1: sz = "b"; break; case 2: sz = "h"; break;
+            case 8: sz = "d"; break; case 4: break;
+            default: vassert(0);
+         }      
+         vex_printf("ldrex%s %sr2, [r4]",
+                    sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
          return;
-      case ARMin_StrEX:
-         if (i->ARMin.StrEX.szB == 8) {
-            vex_printf("strexd r1, r2, r3, [r0]");
-         } else {
-            vex_printf("strex%s ", i->ARMin.StrEX.szB == 1 ? "b"
-                                 : i->ARMin.StrEX.szB == 2 ? "h" : "");
-            vex_printf("r1, r2, [r0]");
-         }
+      }
+      case ARMin_StrEX: {
+         HChar* sz = "";
+         switch (i->ARMin.StrEX.szB) {
+            case 1: sz = "b"; break; case 2: sz = "h"; break;
+            case 8: sz = "d"; break; case 4: break;
+            default: vassert(0);
+         }      
+         vex_printf("strex%s r0, %sr2, [r4]",
+                    sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
          return;
+      }
       case ARMin_VLdStD:
          if (i->ARMin.VLdStD.isLoad) {
             vex_printf("fldd  ");
@@ -1755,6 +1764,9 @@
          vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
                     "15,0,r0,c7,c5,4)");
          return;
+      case ARMin_CLREX:
+         vex_printf("clrex");
+         return;
       case ARMin_NLdStQ:
          if (i->ARMin.NLdStQ.isLoad)
             vex_printf("vld1.32 {");
@@ -1807,8 +1819,8 @@
          return;
       case ARMin_NUnaryS:
          vex_printf("%s%s%s  ",
-                    showARMNeonUnOpS(i->ARMin.NUnary.op),
-                    showARMNeonUnOpSDataType(i->ARMin.NUnary.op),
+                    showARMNeonUnOpS(i->ARMin.NUnaryS.op),
+                    showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
                     showARMNeonDataSize(i));
          ppARMNRS(i->ARMin.NUnaryS.dst);
          vex_printf(", ");
@@ -1997,14 +2009,14 @@
             addHRegUse(u, HRmWrite, hregARM_R1());
          return;
       case ARMin_LdrEX:
-         addHRegUse(u, HRmRead, hregARM_R0());
+         addHRegUse(u, HRmRead, hregARM_R4());
          addHRegUse(u, HRmWrite, hregARM_R2());
          if (i->ARMin.LdrEX.szB == 8)
             addHRegUse(u, HRmWrite, hregARM_R3());
          return;
       case ARMin_StrEX:
-         addHRegUse(u, HRmRead, hregARM_R0());
-         addHRegUse(u, HRmWrite, hregARM_R1());
+         addHRegUse(u, HRmRead, hregARM_R4());
+         addHRegUse(u, HRmWrite, hregARM_R0());
          addHRegUse(u, HRmRead, hregARM_R2());
          if (i->ARMin.StrEX.szB == 8)
             addHRegUse(u, HRmRead, hregARM_R3());
@@ -2093,6 +2105,8 @@
          return;
       case ARMin_MFence:
          return;
+      case ARMin_CLREX:
+         return;
       case ARMin_NLdStQ:
          if (i->ARMin.NLdStQ.isLoad)
             addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
@@ -2271,6 +2285,8 @@
          return;
       case ARMin_MFence:
          return;
+      case ARMin_CLREX:
+         return;
       case ARMin_NLdStQ:
          i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
          mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
@@ -2349,11 +2365,17 @@
             return True;
          }
          break;
+      case ARMin_NUnary:
+         if (i->ARMin.NUnary.op == ARMneon_COPY) {
+            *src = i->ARMin.NUnary.src;
+            *dst = i->ARMin.NUnary.dst;
+            return True;
+         }
+         break;
       default:
          break;
    }
 
-   // todo: float, vector moves
    return False;
 }
 
@@ -2684,14 +2706,13 @@
 
 
 Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
-                    Bool mode64, void* dispatch ) 
+                    Bool mode64,
+                    void* dispatch_unassisted, void* dispatch_assisted ) 
 {
    UInt* p = (UInt*)buf;
    vassert(nbuf >= 32);
    vassert(mode64 == False);
    vassert(0 == (((HWord)buf) & 3));
-   /* since we branch to lr(r13) to get back to dispatch: */
-   vassert(dispatch == NULL);
 
    switch (i->tag) {
       case ARMin_Alu: {
@@ -2879,6 +2900,9 @@
          ARMCondCode cond  = i->ARMin.Goto.cond;
          UInt        rnext = iregNo(i->ARMin.Goto.gnext);
          Int         trc   = -1;
+         /* since we branch to lr(r13) to get back to dispatch: */
+         vassert(dispatch_unassisted == NULL);
+         vassert(dispatch_assisted == NULL);
          switch (jk) {
             case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
                break; /* no need to set GST in these common cases */
@@ -2971,31 +2995,31 @@
          goto bad;
       }
       case ARMin_LdrEX: {
-         /* E1B01F9F   ldrexd   r2, r3, [r0]
-            E1901F9F   ldrex    r2, [r0]
-            E1F01F9F   ldrexh   r2, [r0]
-            E1D01F9F   ldrexb   r2, [r0]
+         /* E1D42F9F   ldrexb r2, [r4]
+            E1F42F9F   ldrexh r2, [r4]
+            E1942F9F   ldrex  r2, [r4]
+            E1B42F9F   ldrexd r2, r3, [r4]
          */
          switch (i->ARMin.LdrEX.szB) {
-            case 8: *p++ = 0xE1B02F9F; goto done;
-            case 4: *p++ = 0xE1902F9F; goto done;
-            //case 2: *p++ = 0xE1F02F9F; goto done;
-            case 1: *p++ = 0xE1D02F9F; goto done;
+            case 1: *p++ = 0xE1D42F9F; goto done;
+            case 2: *p++ = 0xE1F42F9F; goto done;
+            case 4: *p++ = 0xE1942F9F; goto done;
+            case 8: *p++ = 0xE1B42F9F; goto done;
             default: break;
          }
          goto bad;
       }
       case ARMin_StrEX: {
-         /* E1A01F92   strexd  r1, r2, r3, [r0]
-            E1801F92   strex   r1, r2, [r0]
-            E1E01F92   strexh  r1, r2, [r0]
-            E1C01F92   strexb  r1, r2, [r0]
+         /* E1C40F92   strexb r0, r2, [r4]
+            E1E40F92   strexh r0, r2, [r4]
+            E1840F92   strex  r0, r2, [r4]
+            E1A40F92   strexd r0, r2, r3, [r4]
          */
          switch (i->ARMin.StrEX.szB) {
-            case 8: *p++ = 0xE1A01F92; goto done;
-            case 4: *p++ = 0xE1801F92; goto done;
-            //case 2: *p++ = 0xE1E01F92; goto done;
-            case 1: *p++ = 0xE1C01F92; goto done;
+            case 1: *p++ = 0xE1C40F92; goto done;
+            case 2: *p++ = 0xE1E40F92; goto done;
+            case 4: *p++ = 0xE1840F92; goto done;
+            case 8: *p++ = 0xE1A40F92; goto done;
             default: break;
          }
          goto bad;
@@ -3276,6 +3300,11 @@
          *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4  (ISB) */
          goto done;
       }
+      case ARMin_CLREX: {
+         *p++ = 0xF57FF01F; /* clrex */
+         goto done;
+      }
+
       case ARMin_NLdStQ: {
          UInt regD = qregNo(i->ARMin.NLdStQ.dQ) << 1;
          UInt regN, regM;
diff --git a/main/VEX/priv/host_arm_defs.h b/main/VEX/priv/host_arm_defs.h
index 1901e80..0dea3f5 100644
--- a/main/VEX/priv/host_arm_defs.h
+++ b/main/VEX/priv/host_arm_defs.h
@@ -1,4 +1,3 @@
-
 /*---------------------------------------------------------------*/
 /*--- begin                                   host_arm_defs.h ---*/
 /*---------------------------------------------------------------*/
@@ -7,7 +6,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -587,6 +586,7 @@
       ARMin_VCvtID,
       ARMin_FPSCR,
       ARMin_MFence,
+      ARMin_CLREX,
       /* Neon */
       ARMin_NLdStQ,
       ARMin_NLdStD,
@@ -709,18 +709,21 @@
          struct {
             ARMMulOp op;
          } Mul;
-         /* LDREX{,H,B} r0, [r1]
+         /* LDREX{,H,B} r2, [r4]  and
+            LDREXD r2, r3, [r4]   (on LE hosts, transferred value is r3:r2)
             Again, hardwired registers since this is not performance
             critical, and there are possibly constraints on the
             registers that we can't express in the register allocator.*/
          struct {
-            Int  szB; /* currently only 4 is allowed */
+            Int  szB; /* 1, 2, 4 or 8 */
          } LdrEX;
-         /* STREX{,H,B} r0, r1, [r2]
-            r0 = SC( [r2] = r1 )
+         /* STREX{,H,B} r0, r2, [r4]  and  
+            STREXD r0, r2, r3, [r4]   (on LE hosts, transferred value is r3:r2)
+            r0 = SC( [r4] = r2 )      (8, 16, 32 bit transfers)
+            r0 = SC( [r4] = r3:r2)    (64 bit transfers)
             Ditto comment re fixed registers. */
          struct {
-            Int  szB; /* currently only 4 is allowed */
+            Int  szB; /* 1, 2, 4 or 8 */
          } StrEX;
          /* VFP INSTRUCTIONS */
          /* 64-bit Fp load/store */
@@ -824,6 +827,9 @@
          */
          struct {
          } MFence;
+         /* A CLREX instruction. */
+         struct {
+         } CLREX;
          /* Neon data processing instruction: 3 registers of the same
             length */
          struct {
@@ -937,10 +943,11 @@
                                      HReg dst, HReg src );
 extern ARMInstr* ARMInstr_FPSCR    ( Bool toFPSCR, HReg iReg );
 extern ARMInstr* ARMInstr_MFence   ( void );
+extern ARMInstr* ARMInstr_CLREX    ( void );
 extern ARMInstr* ARMInstr_NLdStQ   ( Bool isLoad, HReg, ARMAModeN* );
 extern ARMInstr* ARMInstr_NLdStD   ( Bool isLoad, HReg, ARMAModeN* );
 extern ARMInstr* ARMInstr_NUnary   ( ARMNeonUnOp, HReg, HReg, UInt, Bool );
-extern ARMInstr* ARMInstr_NUnaryS  ( ARMNeonUnOp, ARMNRS*, ARMNRS*,
+extern ARMInstr* ARMInstr_NUnaryS  ( ARMNeonUnOpS, ARMNRS*, ARMNRS*,
                                      UInt, Bool );
 extern ARMInstr* ARMInstr_NDual    ( ARMNeonDualOp, HReg, HReg, UInt, Bool );
 extern ARMInstr* ARMInstr_NBinary  ( ARMNeonBinOp, HReg, HReg, HReg,
@@ -960,7 +967,9 @@
 extern void mapRegs_ARMInstr     ( HRegRemap*, ARMInstr*, Bool );
 extern Bool isMove_ARMInstr      ( ARMInstr*, HReg*, HReg* );
 extern Int  emit_ARMInstr        ( UChar* buf, Int nbuf, ARMInstr*, 
-                                   Bool, void* dispatch );
+                                   Bool,
+                                   void* dispatch_unassisted,
+                                   void* dispatch_assisted );
 
 extern void genSpill_ARM  ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
                             HReg rreg, Int offset, Bool );
diff --git a/main/VEX/priv/host_arm_isel.c b/main/VEX/priv/host_arm_isel.c
index 45c6f37..e695567 100644
--- a/main/VEX/priv/host_arm_isel.c
+++ b/main/VEX/priv/host_arm_isel.c
@@ -7,11 +7,11 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    NEON support is
-   Copyright (C) 2010-2010 Samsung Electronics
+   Copyright (C) 2010-2011 Samsung Electronics
    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
               and Kirill Batuzov <batuzovk@ispras.ru>
 
@@ -211,8 +211,8 @@
 static ARMAModeV*  iselIntExpr_AModeV_wrk ( ISelEnv* env, IRExpr* e );
 static ARMAModeV*  iselIntExpr_AModeV     ( ISelEnv* env, IRExpr* e );
 
-static ARMAModeN* iselIntExpr_AModeN_wrk  ( ISelEnv* env, IRExpr* e );
-static ARMAModeN* iselIntExpr_AModeN      ( ISelEnv* env, IRExpr* e );
+static ARMAModeN*  iselIntExpr_AModeN_wrk ( ISelEnv* env, IRExpr* e );
+static ARMAModeN*  iselIntExpr_AModeN     ( ISelEnv* env, IRExpr* e );
 
 static ARMRI84*    iselIntExpr_RI84_wrk
         ( /*OUT*/Bool* didInv, Bool mayInv, ISelEnv* env, IRExpr* e );
@@ -3351,64 +3351,64 @@
             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, False));
             return res;
          }
-         case Iop_Shorten16x8:
-         case Iop_Shorten32x4:
-         case Iop_Shorten64x2: {
+         case Iop_NarrowUn16to8x8:
+         case Iop_NarrowUn32to16x4:
+         case Iop_NarrowUn64to32x2: {
             HReg res = newVRegD(env);
             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
             UInt size = 0;
             switch(e->Iex.Binop.op) {
-               case Iop_Shorten16x8: size = 0; break;
-               case Iop_Shorten32x4: size = 1; break;
-               case Iop_Shorten64x2: size = 2; break;
+               case Iop_NarrowUn16to8x8:  size = 0; break;
+               case Iop_NarrowUn32to16x4: size = 1; break;
+               case Iop_NarrowUn64to32x2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYN,
                                           res, arg, size, False));
             return res;
          }
-         case Iop_QShortenS16Sx8:
-         case Iop_QShortenS32Sx4:
-         case Iop_QShortenS64Sx2: {
+         case Iop_QNarrowUn16Sto8Sx8:
+         case Iop_QNarrowUn32Sto16Sx4:
+         case Iop_QNarrowUn64Sto32Sx2: {
             HReg res = newVRegD(env);
             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
             UInt size = 0;
             switch(e->Iex.Binop.op) {
-               case Iop_QShortenS16Sx8: size = 0; break;
-               case Iop_QShortenS32Sx4: size = 1; break;
-               case Iop_QShortenS64Sx2: size = 2; break;
+               case Iop_QNarrowUn16Sto8Sx8:  size = 0; break;
+               case Iop_QNarrowUn32Sto16Sx4: size = 1; break;
+               case Iop_QNarrowUn64Sto32Sx2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNSS,
                                           res, arg, size, False));
             return res;
          }
-         case Iop_QShortenU16Sx8:
-         case Iop_QShortenU32Sx4:
-         case Iop_QShortenU64Sx2: {
+         case Iop_QNarrowUn16Sto8Ux8:
+         case Iop_QNarrowUn32Sto16Ux4:
+         case Iop_QNarrowUn64Sto32Ux2: {
             HReg res = newVRegD(env);
             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
             UInt size = 0;
             switch(e->Iex.Binop.op) {
-               case Iop_QShortenU16Sx8: size = 0; break;
-               case Iop_QShortenU32Sx4: size = 1; break;
-               case Iop_QShortenU64Sx2: size = 2; break;
+               case Iop_QNarrowUn16Sto8Ux8:  size = 0; break;
+               case Iop_QNarrowUn32Sto16Ux4: size = 1; break;
+               case Iop_QNarrowUn64Sto32Ux2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUS,
                                           res, arg, size, False));
             return res;
          }
-         case Iop_QShortenU16Ux8:
-         case Iop_QShortenU32Ux4:
-         case Iop_QShortenU64Ux2: {
+         case Iop_QNarrowUn16Uto8Ux8:
+         case Iop_QNarrowUn32Uto16Ux4:
+         case Iop_QNarrowUn64Uto32Ux2: {
             HReg res = newVRegD(env);
             HReg arg = iselNeonExpr(env, e->Iex.Unop.arg);
             UInt size = 0;
             switch(e->Iex.Binop.op) {
-               case Iop_QShortenU16Ux8: size = 0; break;
-               case Iop_QShortenU32Ux4: size = 1; break;
-               case Iop_QShortenU64Ux2: size = 2; break;
+               case Iop_QNarrowUn16Uto8Ux8:  size = 0; break;
+               case Iop_QNarrowUn32Uto16Ux4: size = 1; break;
+               case Iop_QNarrowUn64Uto32Ux2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYQNUU,
@@ -3974,32 +3974,32 @@
             addInstr(env, ARMInstr_NUnary(ARMneon_NOT, res, tmp, 4, True));
             return res;
          }
-         case Iop_Longen8Ux8:
-         case Iop_Longen16Ux4:
-         case Iop_Longen32Ux2: {
+         case Iop_Widen8Uto16x8:
+         case Iop_Widen16Uto32x4:
+         case Iop_Widen32Uto64x2: {
             HReg res = newVRegV(env);
             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
             UInt size;
             switch (e->Iex.Unop.op) {
-               case Iop_Longen8Ux8: size = 0; break;
-               case Iop_Longen16Ux4: size = 1; break;
-               case Iop_Longen32Ux2: size = 2; break;
+               case Iop_Widen8Uto16x8:  size = 0; break;
+               case Iop_Widen16Uto32x4: size = 1; break;
+               case Iop_Widen32Uto64x2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLU,
                                           res, arg, size, True));
             return res;
          }
-         case Iop_Longen8Sx8:
-         case Iop_Longen16Sx4:
-         case Iop_Longen32Sx2: {
+         case Iop_Widen8Sto16x8:
+         case Iop_Widen16Sto32x4:
+         case Iop_Widen32Sto64x2: {
             HReg res = newVRegV(env);
             HReg arg = iselNeon64Expr(env, e->Iex.Unop.arg);
             UInt size;
             switch (e->Iex.Unop.op) {
-               case Iop_Longen8Sx8: size = 0; break;
-               case Iop_Longen16Sx4: size = 1; break;
-               case Iop_Longen32Sx2: size = 2; break;
+               case Iop_Widen8Sto16x8:  size = 0; break;
+               case Iop_Widen16Sto32x4: size = 1; break;
+               case Iop_Widen32Sto64x2: size = 2; break;
                default: vassert(0);
             }
             addInstr(env, ARMInstr_NUnary(ARMneon_COPYLS,
@@ -4217,7 +4217,42 @@
                   }
                }
             }
-            /* Does not match "VMOV Reg, Imm" form */
+            /* Does not match "VMOV Reg, Imm" form.  We'll have to do
+               it the slow way. */
+            { 
+               /* local scope */
+               /* Done via the stack for ease of use. */
+               /* FIXME: assumes little endian host */
+               HReg       w3, w2, w1, w0;
+               HReg       res  = newVRegV(env);
+               ARMAMode1* sp_0  = ARMAMode1_RI(hregARM_R13(), 0);
+               ARMAMode1* sp_4  = ARMAMode1_RI(hregARM_R13(), 4);
+               ARMAMode1* sp_8  = ARMAMode1_RI(hregARM_R13(), 8);
+               ARMAMode1* sp_12 = ARMAMode1_RI(hregARM_R13(), 12);
+               ARMRI84*   c_16  = ARMRI84_I84(16,0);
+               /* Make space for SP */
+               addInstr(env, ARMInstr_Alu(ARMalu_SUB, hregARM_R13(),
+                                                      hregARM_R13(), c_16));
+
+               /* Store the less significant 64 bits */
+               iselInt64Expr(&w1, &w0, env, e->Iex.Binop.arg2);
+               addInstr(env, ARMInstr_LdSt32(False/*store*/, w0, sp_0));
+               addInstr(env, ARMInstr_LdSt32(False/*store*/, w1, sp_4));
+         
+               /* Store the more significant 64 bits */
+               iselInt64Expr(&w3, &w2, env, e->Iex.Binop.arg1);
+               addInstr(env, ARMInstr_LdSt32(False/*store*/, w2, sp_8));
+               addInstr(env, ARMInstr_LdSt32(False/*store*/, w3, sp_12));
+         
+                /* Load result back from stack. */
+                addInstr(env, ARMInstr_NLdStQ(True/*load*/, res,
+                                              mkARMAModeN_R(hregARM_R13())));
+
+                /* Restore SP */
+                addInstr(env, ARMInstr_Alu(ARMalu_ADD, hregARM_R13(),
+                                           hregARM_R13(), c_16));
+                return res;
+            } /* local scope */
             goto neon_expr_bad;
          case Iop_AndV128: {
             HReg res = newVRegV(env);
@@ -5820,84 +5855,86 @@
          /* LL */
          IRTemp res = stmt->Ist.LLSC.result;
          IRType ty  = typeOfIRTemp(env->type_env, res);
-         if (ty == Ity_I32 || ty == Ity_I8) {
+         if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
             Int  szB   = 0;
             HReg r_dst = lookupIRTemp(env, res);
             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
             switch (ty) {
                case Ity_I8:  szB = 1; break;
+               case Ity_I16: szB = 2; break;
                case Ity_I32: szB = 4; break;
                default:      vassert(0);
             }
-            addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
+            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
             addInstr(env, ARMInstr_LdrEX(szB));
             addInstr(env, mk_iMOVds_RR(r_dst, hregARM_R2()));
             return;
-         } else if (ty == Ity_I64) {
+         }
+         if (ty == Ity_I64) {
             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-            addInstr(env, mk_iMOVds_RR(hregARM_R0(), raddr));
-            addInstr(env, ARMInstr_LdrEX(8 /* 64-bit */));
+            addInstr(env, mk_iMOVds_RR(hregARM_R4(), raddr));
+            addInstr(env, ARMInstr_LdrEX(8));
+            /* Result is in r3:r2.  On a non-NEON capable CPU, we must
+               move it into a result register pair.  On a NEON capable
+               CPU, the result register will be a 64 bit NEON
+               register, so we must move it there instead. */
             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
-                HReg tmp = lookupIRTemp(env, res);
-                addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R2(),
-                        hregARM_R3()));
+               HReg dst = lookupIRTemp(env, res);
+               addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
+                                                        hregARM_R2()));
             } else {
-                HReg dstHi, dstLo;
-                /* The returned value is in r1:r0.  Park it in the
-                   register-pair associated with tmp. */
-                lookupIRTemp64( &dstHi, &dstLo, env, res);
-                addInstr(env, mk_iMOVds_RR(dstHi, hregARM_R2()) );
-                addInstr(env, mk_iMOVds_RR(dstLo, hregARM_R3()) );
+               HReg r_dst_hi, r_dst_lo;
+               lookupIRTemp64(&r_dst_hi, &r_dst_lo, env, res);
+               addInstr(env, mk_iMOVds_RR(r_dst_lo, hregARM_R2()));
+               addInstr(env, mk_iMOVds_RR(r_dst_hi, hregARM_R3()));
             }
             return;
          }
-         /* else fall thru; is unhandled */
+         /*NOTREACHED*/
+         vassert(0); 
       } else {
          /* SC */
-         IRTemp res = stmt->Ist.LLSC.result;
-         IRType ty  = typeOfIRTemp(env->type_env, res);
          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
-         vassert(ty == Ity_I1);
-         if (tyd == Ity_I32 || tyd == Ity_I8) {
-            Int  szB     = 0;
-            HReg r_res   = lookupIRTemp(env, res);
-            HReg rD      = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
-            HReg rA      = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-            ARMRI84* one = ARMRI84_I84(1,0);
+         if (tyd == Ity_I32 || tyd == Ity_I16 || tyd == Ity_I8) {
+            Int  szB = 0;
+            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
+            HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
             switch (tyd) {
                case Ity_I8:  szB = 1; break;
+               case Ity_I16: szB = 2; break;
                case Ity_I32: szB = 4; break;
                default:      vassert(0);
             }
-            addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rD));
+            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
             addInstr(env, ARMInstr_StrEX(szB));
-            /* now r1 is 1 if failed, 0 if success.  Change to IR
-               conventions (0 is fail, 1 is success).  Also transfer
-               result to r_res. */
-            addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
-            /* And be conservative -- mask off all but the lowest bit */
-            addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
-            return;
-         } else if (tyd == Ity_I64) {
-             HReg r_res   = lookupIRTemp(env, res);
-             HReg rA      = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-             ARMRI84* one = ARMRI84_I84(1,0);
-             HReg rDHi, rDLo;
-             iselInt64Expr(&rDHi, &rDLo, env, stmt->Ist.LLSC.storedata);
-             addInstr(env, mk_iMOVds_RR(hregARM_R0(), rA));
-             addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDHi));
-             addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDLo));
-             addInstr(env, ARMInstr_StrEX(8 /* 64-bit */));
-             /* now r1 is 1 if failed, 0 if success.  Change to IR
-                conventions (0 is fail, 1 is success).  Also transfer
-                result to r_res. */
-             addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R1(), one));
-             /* And be conservative -- mask off all but the lowest bit */
-             addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
-             return;
+         } else {
+            vassert(tyd == Ity_I64);
+            /* This is really ugly.  There is no is/is-not NEON
+               decision akin to the case for LL, because iselInt64Expr
+               fudges this for us, and always gets the result into two
+               GPRs even if this means moving it from a NEON
+               register. */
+            HReg rDhi, rDlo;
+            iselInt64Expr(&rDhi, &rDlo, env, stmt->Ist.LLSC.storedata);
+            HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
+            addInstr(env, mk_iMOVds_RR(hregARM_R2(), rDlo));
+            addInstr(env, mk_iMOVds_RR(hregARM_R3(), rDhi));
+            addInstr(env, mk_iMOVds_RR(hregARM_R4(), rA));
+            addInstr(env, ARMInstr_StrEX(8));
          }
-         /* else fall thru; is unhandled */
+         /* now r0 is 1 if failed, 0 if success.  Change to IR
+            conventions (0 is fail, 1 is success).  Also transfer
+            result to r_res. */
+         IRTemp   res   = stmt->Ist.LLSC.result;
+         IRType   ty    = typeOfIRTemp(env->type_env, res);
+         HReg     r_res = lookupIRTemp(env, res);
+         ARMRI84* one   = ARMRI84_I84(1,0);
+         vassert(ty == Ity_I1);
+         addInstr(env, ARMInstr_Alu(ARMalu_XOR, r_res, hregARM_R0(), one));
+         /* And be conservative -- mask off all but the lowest bit */
+         addInstr(env, ARMInstr_Alu(ARMalu_AND, r_res, r_res, one));
+         return;
       }
       break;
    }
@@ -5906,7 +5943,10 @@
    case Ist_MBE:
       switch (stmt->Ist.MBE.event) {
          case Imbe_Fence:
-            addInstr(env,ARMInstr_MFence());
+            addInstr(env, ARMInstr_MFence());
+            return;
+         case Imbe_CancelReservation:
+            addInstr(env, ARMInstr_CLREX());
             return;
          default:
             break;
@@ -5977,7 +6017,6 @@
    HReg     hreg, hregHI;
    ISelEnv* env;
    UInt     hwcaps_host = archinfo_host->hwcaps;
-   Bool     neon = False;
    static UInt counter = 0;
 
    /* sanity ... */
@@ -6015,7 +6054,6 @@
          case Ity_I64:
             if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
                hreg = mkHReg(j++, HRcFlt64, True);
-               neon = True;
             } else {
                hregHI = mkHReg(j++, HRcInt32, True);
                hreg   = mkHReg(j++, HRcInt32, True);
@@ -6023,8 +6061,7 @@
             break;
          case Ity_F32:  hreg   = mkHReg(j++, HRcFlt32, True); break;
          case Ity_F64:  hreg   = mkHReg(j++, HRcFlt64, True); break;
-         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True);
-                        neon   = True; break;
+         case Ity_V128: hreg   = mkHReg(j++, HRcVec128, True); break;
          default: ppIRType(bb->tyenv->types[i]);
                   vpanic("iselBB: IRTemp type");
       }
diff --git a/main/VEX/priv/host_generic_reg_alloc2.c b/main/VEX/priv/host_generic_reg_alloc2.c
index 48303ff..5052d9d 100644
--- a/main/VEX/priv/host_generic_reg_alloc2.c
+++ b/main/VEX/priv/host_generic_reg_alloc2.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/host_generic_regs.c b/main/VEX/priv/host_generic_regs.c
index e36b4dc..713add9 100644
--- a/main/VEX/priv/host_generic_regs.c
+++ b/main/VEX/priv/host_generic_regs.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -166,7 +166,7 @@
    if (!hregIsVirtual(orig))
       vpanic("addToHRegMap: orig is not a vreg");
    if (hregIsVirtual(replacement))
-      vpanic("addToHRegMap: replacement is not a vreg");
+      vpanic("addToHRegMap: replacement is a vreg");
 
    vassert(map->n_used+1 < N_HREG_REMAP);
    map->orig[map->n_used]        = orig;
diff --git a/main/VEX/priv/host_generic_regs.h b/main/VEX/priv/host_generic_regs.h
index 1c6826c..0fde5ae 100644
--- a/main/VEX/priv/host_generic_regs.h
+++ b/main/VEX/priv/host_generic_regs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/host_generic_simd128.c b/main/VEX/priv/host_generic_simd128.c
index 2430e67..6e1100c 100644
--- a/main/VEX/priv/host_generic_simd128.c
+++ b/main/VEX/priv/host_generic_simd128.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2010-2010 OpenWorks GbR
+   Copyright (C) 2010-2011 OpenWorks GbR
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -88,6 +88,12 @@
    return toUChar((xx < yy) ? xx : yy);
 }
 
+static inline ULong cmpEQ64 ( Long xx, Long yy )
+{
+   return (((Long)xx) == ((Long)yy))
+             ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
+}
+
 static inline ULong cmpGT64S ( Long xx, Long yy )
 {
    return (((Long)xx) > ((Long)yy))
@@ -104,7 +110,27 @@
    return toUChar(((Char)v) >> n);
 }
 
-void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
+static inline UShort qnarrow32Sto16U ( UInt xx0 )
+{
+   Int xx = (Int)xx0;
+   if (xx < 0)     xx = 0;
+   if (xx > 65535) xx = 65535;
+   return (UShort)xx;
+}
+
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
+
+void VEX_REGPARM(3)
+     h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
                               V128* argL, V128* argR )
 {
    res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
@@ -113,7 +139,8 @@
    res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
@@ -122,7 +149,8 @@
    res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
@@ -131,7 +159,8 @@
    res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
@@ -140,7 +169,8 @@
    res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
@@ -149,7 +179,8 @@
    res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
@@ -162,7 +193,8 @@
    res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
 }
 
-void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
@@ -175,7 +207,8 @@
    res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
 }
 
-void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
@@ -196,7 +229,8 @@
    res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
 }
 
-void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
@@ -217,7 +251,16 @@
    res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
 }
 
-void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
+                                V128* argL, V128* argR )
+{
+   res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
+   res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
+}
+
+void VEX_REGPARM(3)
+     h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
                                  V128* argL, V128* argR )
 {
    res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
@@ -231,7 +274,8 @@
    semantics of these primops (Sar64x2, etc) it is an error if in
    fact we are ever given an out-of-range shift amount. 
 */
-void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
                                V128* argL, UInt nn)
 {
    /* vassert(nn < 64); */
@@ -240,7 +284,8 @@
    res->w64[1] = sar64(argL->w64[1], nn);
 }
 
-void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
                               V128* argL, UInt nn)
 {
    /* vassert(nn < 8); */
@@ -263,6 +308,57 @@
    res->w8[15] = sar8(argL->w8[15], nn);
 }
 
+void VEX_REGPARM(3)
+     h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
+                                           V128* argL, V128* argR )
+{
+   res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
+   res->w16[1] = qnarrow32Sto16U(argR->w32[1]);
+   res->w16[2] = qnarrow32Sto16U(argR->w32[2]);
+   res->w16[3] = qnarrow32Sto16U(argR->w32[3]);
+   res->w16[4] = qnarrow32Sto16U(argL->w32[0]);
+   res->w16[5] = qnarrow32Sto16U(argL->w32[1]);
+   res->w16[6] = qnarrow32Sto16U(argL->w32[2]);
+   res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
+}
+
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w8[ 0] = narrow16to8(argR->w16[0]);
+   res->w8[ 1] = narrow16to8(argR->w16[1]);
+   res->w8[ 2] = narrow16to8(argR->w16[2]);
+   res->w8[ 3] = narrow16to8(argR->w16[3]);
+   res->w8[ 4] = narrow16to8(argR->w16[4]);
+   res->w8[ 5] = narrow16to8(argR->w16[5]);
+   res->w8[ 6] = narrow16to8(argR->w16[6]);
+   res->w8[ 7] = narrow16to8(argR->w16[7]);
+   res->w8[ 8] = narrow16to8(argL->w16[0]);
+   res->w8[ 9] = narrow16to8(argL->w16[1]);
+   res->w8[10] = narrow16to8(argL->w16[2]);
+   res->w8[11] = narrow16to8(argL->w16[3]);
+   res->w8[12] = narrow16to8(argL->w16[4]);
+   res->w8[13] = narrow16to8(argL->w16[5]);
+   res->w8[14] = narrow16to8(argL->w16[6]);
+   res->w8[15] = narrow16to8(argL->w16[7]);
+}
+
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w16[0] = narrow32to16(argR->w32[0]);
+   res->w16[1] = narrow32to16(argR->w32[1]);
+   res->w16[2] = narrow32to16(argR->w32[2]);
+   res->w16[3] = narrow32to16(argR->w32[3]);
+   res->w16[4] = narrow32to16(argL->w32[0]);
+   res->w16[5] = narrow32to16(argL->w32[1]);
+   res->w16[6] = narrow32to16(argL->w32[2]);
+   res->w16[7] = narrow32to16(argL->w32[3]);
+}
+
+
 /*---------------------------------------------------------------*/
 /*--- end                              host_generic_simd128.c ---*/
 /*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/host_generic_simd128.h b/main/VEX/priv/host_generic_simd128.h
index d764439..6f9cc97 100644
--- a/main/VEX/priv/host_generic_simd128.h
+++ b/main/VEX/priv/host_generic_simd128.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2010-2010 OpenWorks GbR
+   Copyright (C) 2010-2011 OpenWorks GbR
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -45,22 +45,43 @@
 
 #include "libvex_basictypes.h"
 
-/* DO NOT MAKE THESE INTO REGPARM FNS!  THIS WILL BREAK CALLING
-   SEQUENCES GENERATED BY host-x86/isel.c. */
+extern VEX_REGPARM(3)
+       void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
 
-extern void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
-extern void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
+extern /*not-regparm*/
+       void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
+extern /*not-regparm*/
+       void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
 
+extern VEX_REGPARM(3)
+       void h_generic_calc_QNarrowBin32Sto16Ux8
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin16to8x16
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin32to16x8
+                                      ( /*OUT*/V128*, V128*, V128* );
 
 #endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
 
diff --git a/main/VEX/priv/host_generic_simd64.c b/main/VEX/priv/host_generic_simd64.c
index 03d6d2f..52af103 100644
--- a/main/VEX/priv/host_generic_simd64.c
+++ b/main/VEX/priv/host_generic_simd64.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -272,7 +272,7 @@
    return toUChar(xx==0 ? 0 : 0xFF);
 }
 
-static inline Short qnarrow32Sto16 ( UInt xx0 )
+static inline Short qnarrow32Sto16S ( UInt xx0 )
 {
    Int xx = (Int)xx0;
    if (xx < -32768) xx = -32768;
@@ -280,7 +280,7 @@
    return (Short)xx;
 }
 
-static inline Char qnarrow16Sto8 ( UShort xx0 )
+static inline Char qnarrow16Sto8S ( UShort xx0 )
 {
    Short xx = (Short)xx0;
    if (xx < -128) xx = -128;
@@ -288,7 +288,7 @@
    return (Char)xx;
 }
 
-static inline UChar qnarrow16Uto8 ( UShort xx0 )
+static inline UChar qnarrow16Sto8U ( UShort xx0 )
 {
    Short xx = (Short)xx0;
    if (xx < 0)   xx = 0;
@@ -296,6 +296,16 @@
    return (UChar)xx;
 }
 
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
 /* shifts: we don't care about out-of-range ones, since
    that is dealt with at a higher level. */
 
@@ -759,21 +769,21 @@
 
 /* ------------ Saturating narrowing ------------ */
 
-ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb )
 {
    UInt d = sel32x2_1(aa);
    UInt c = sel32x2_0(aa);
    UInt b = sel32x2_1(bb);
    UInt a = sel32x2_0(bb);
    return mk16x4( 
-             qnarrow32Sto16(d),
-             qnarrow32Sto16(c),
-             qnarrow32Sto16(b),
-             qnarrow32Sto16(a)
+             qnarrow32Sto16S(d),
+             qnarrow32Sto16S(c),
+             qnarrow32Sto16S(b),
+             qnarrow32Sto16S(a)
           );
 }
 
-ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb )
 {
    UShort h = sel16x4_3(aa);
    UShort g = sel16x4_2(aa);
@@ -784,18 +794,18 @@
    UShort b = sel16x4_1(bb);
    UShort a = sel16x4_0(bb);
    return mk8x8( 
-             qnarrow16Sto8(h),
-             qnarrow16Sto8(g),
-             qnarrow16Sto8(f),
-             qnarrow16Sto8(e),
-             qnarrow16Sto8(d),
-             qnarrow16Sto8(c),
-             qnarrow16Sto8(b),
-             qnarrow16Sto8(a)
+             qnarrow16Sto8S(h),
+             qnarrow16Sto8S(g),
+             qnarrow16Sto8S(f),
+             qnarrow16Sto8S(e),
+             qnarrow16Sto8S(d),
+             qnarrow16Sto8S(c),
+             qnarrow16Sto8S(b),
+             qnarrow16Sto8S(a)
           );
 }
 
-ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb )
+ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
 {
    UShort h = sel16x4_3(aa);
    UShort g = sel16x4_2(aa);
@@ -806,14 +816,52 @@
    UShort b = sel16x4_1(bb);
    UShort a = sel16x4_0(bb);
    return mk8x8( 
-             qnarrow16Uto8(h),
-             qnarrow16Uto8(g),
-             qnarrow16Uto8(f),
-             qnarrow16Uto8(e),
-             qnarrow16Uto8(d),
-             qnarrow16Uto8(c),
-             qnarrow16Uto8(b),
-             qnarrow16Uto8(a)
+             qnarrow16Sto8U(h),
+             qnarrow16Sto8U(g),
+             qnarrow16Sto8U(f),
+             qnarrow16Sto8U(e),
+             qnarrow16Sto8U(d),
+             qnarrow16Sto8U(c),
+             qnarrow16Sto8U(b),
+             qnarrow16Sto8U(a)
+          );
+}
+
+/* ------------ Truncating narrowing ------------ */
+
+ULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb )
+{
+   UInt d = sel32x2_1(aa);
+   UInt c = sel32x2_0(aa);
+   UInt b = sel32x2_1(bb);
+   UInt a = sel32x2_0(bb);
+   return mk16x4( 
+             narrow32to16(d),
+             narrow32to16(c),
+             narrow32to16(b),
+             narrow32to16(a)
+          );
+}
+
+ULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb )
+{
+   UShort h = sel16x4_3(aa);
+   UShort g = sel16x4_2(aa);
+   UShort f = sel16x4_1(aa);
+   UShort e = sel16x4_0(aa);
+   UShort d = sel16x4_3(bb);
+   UShort c = sel16x4_2(bb);
+   UShort b = sel16x4_1(bb);
+   UShort a = sel16x4_0(bb);
+   return mk8x8( 
+             narrow16to8(h),
+             narrow16to8(g),
+             narrow16to8(f),
+             narrow16to8(e),
+             narrow16to8(d),
+             narrow16to8(c),
+             narrow16to8(b),
+             narrow16to8(a)
           );
 }
 
diff --git a/main/VEX/priv/host_generic_simd64.h b/main/VEX/priv/host_generic_simd64.h
index e854fc7..5b6640c 100644
--- a/main/VEX/priv/host_generic_simd64.h
+++ b/main/VEX/priv/host_generic_simd64.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -87,9 +87,11 @@
 extern ULong h_generic_calc_CmpNEZ16x4 ( ULong );
 extern ULong h_generic_calc_CmpNEZ8x8  ( ULong );
 
-extern ULong h_generic_calc_QNarrow32Sx2 ( ULong, ULong );
-extern ULong h_generic_calc_QNarrow16Sx4 ( ULong, ULong );
-extern ULong h_generic_calc_QNarrow16Ux4 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong, ULong );
+extern ULong h_generic_calc_QNarrowBin16Sto8Sx8  ( ULong, ULong );
+extern ULong h_generic_calc_QNarrowBin16Sto8Ux8  ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin32to16x4    ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin16to8x8     ( ULong, ULong );
 
 extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong );
 extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong );
diff --git a/main/VEX/priv/host_ppc_defs.c b/main/VEX/priv/host_ppc_defs.c
index 973e151..ea06495 100644
--- a/main/VEX/priv/host_ppc_defs.c
+++ b/main/VEX/priv/host_ppc_defs.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -286,6 +286,8 @@
       return (cond.test == Pct_TRUE) ? "cr7.gt=1" : "cr7.gt=0";
    case Pcf_7LT:
       return (cond.test == Pct_TRUE) ? "cr7.lt=1" : "cr7.lt=0";
+   case Pcf_NONE:
+      return "no-flag";
    default: vpanic("ppPPCCondCode");
    }
 }
@@ -296,6 +298,11 @@
    PPCCondCode cc;
    cc.flag = flag;
    cc.test = test;
+   if (test == Pct_ALWAYS) { 
+      vassert(flag == Pcf_NONE);
+   } else {
+      vassert(flag != Pcf_NONE);
+   }
    return cc;
 }
 
@@ -798,10 +805,11 @@
    if (!hi) vassert(!syned);
    return i;
 }
-PPCInstr* PPCInstr_Div ( Bool syned, Bool sz32,
+PPCInstr* PPCInstr_Div ( Bool extended, Bool syned, Bool sz32,
                          HReg dst, HReg srcL, HReg srcR ) {
    PPCInstr* i      = LibVEX_Alloc(sizeof(PPCInstr));
    i->tag           = Pin_Div;
+   i->Pin.Div.extended = extended;
    i->Pin.Div.syned = syned;
    i->Pin.Div.sz32  = sz32;
    i->Pin.Div.dst   = dst;
@@ -962,15 +970,65 @@
    i->Pin.FpRSP.src = src;
    return i;
 }
-PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, 
-                            HReg dst, HReg src ) {
+
+/*
+Valid combo | fromI | int32 | syned | flt64 |
+--------------------------------------------
+            |  n       n       n       n    |
+--------------------------------------------
+ F64->I64U  |  n       n       n       y    |
+--------------------------------------------
+            |  n       n       y       n    |
+--------------------------------------------
+ F64->I64S  |  n       n       y       y    |
+--------------------------------------------
+            |  n       y       n       n    |
+--------------------------------------------
+ F64->I32U  |  n       y       n       y    |
+--------------------------------------------
+            |  n       y       y       n    |
+--------------------------------------------
+ F64->I32S  |  n       y       y       y    |
+--------------------------------------------
+ I64U->F32  |  y       n       n       n    |
+--------------------------------------------
+ I64U->F64  |  y       n       n       y    |
+--------------------------------------------
+            |  y       n       y       n    |
+--------------------------------------------
+ I64S->F64  |  y       n       y       y    |
+--------------------------------------------
+            |  y       y       n       n    |
+--------------------------------------------
+            |  y       y       n       y    |
+--------------------------------------------
+            |  y       y       y       n    |
+--------------------------------------------
+            |  y       y       y       y    |
+--------------------------------------------
+*/
+PPCInstr* PPCInstr_FpCftI ( Bool fromI, Bool int32, Bool syned,
+                            Bool flt64, HReg dst, HReg src ) {
+   Bool tmp = fromI | int32 | syned | flt64;
+   vassert(tmp == True || tmp == False); // iow, no high bits set
+   UShort conversion = 0;
+   conversion = (fromI << 3) | (int32 << 2) | (syned << 1) | flt64;
+   switch (conversion) {
+      // Supported conversion operations
+      case 1: case 3: case 5: case 7:
+      case 8: case 9: case 11:
+         break;
+      default:
+         vpanic("PPCInstr_FpCftI(ppc_host)");
+   }
    PPCInstr* i         = LibVEX_Alloc(sizeof(PPCInstr));
    i->tag              = Pin_FpCftI;
    i->Pin.FpCftI.fromI = fromI;
    i->Pin.FpCftI.int32 = int32;
+   i->Pin.FpCftI.syned = syned;
+   i->Pin.FpCftI.flt64 = flt64;
    i->Pin.FpCftI.dst   = dst;
    i->Pin.FpCftI.src   = src;
-   vassert(!(int32 && fromI)); /* no such insn ("fcfiw"). */
    return i;
 }
 PPCInstr* PPCInstr_FpCMov ( PPCCondCode cond, HReg dst, HReg src ) {
@@ -1065,7 +1123,7 @@
    i->Pin.AvBin32x4.srcR = srcR;
    return i;
 }
-PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvOp op, HReg dst,
+PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst,
                                 HReg srcL, HReg srcR ) {
    PPCInstr* i            = LibVEX_Alloc(sizeof(PPCInstr));
    i->tag                 = Pin_AvBin32Fx4;
@@ -1075,7 +1133,7 @@
    i->Pin.AvBin32Fx4.srcR = srcR;
    return i;
 }
-PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvOp op, HReg dst, HReg src ) {
+PPCInstr* PPCInstr_AvUn32Fx4 ( PPCAvFpOp op, HReg dst, HReg src ) {
    PPCInstr* i          = LibVEX_Alloc(sizeof(PPCInstr));
    i->tag               = Pin_AvUn32Fx4;
    i->Pin.AvUn32Fx4.op  = op;
@@ -1246,8 +1304,9 @@
       ppHRegPPC(i->Pin.MulL.srcR);
       return;
    case Pin_Div:
-      vex_printf("div%c%s ",
+      vex_printf("div%c%s%s ",
                  i->Pin.Div.sz32 ? 'w' : 'd',
+                 i->Pin.Div.extended ? "e" : "",
                  i->Pin.Div.syned ? "" : "u");
       ppHRegPPC(i->Pin.Div.dst);
       vex_printf(",");
@@ -1433,15 +1492,34 @@
       ppHRegPPC(i->Pin.FpRSP.src);
       return;
    case Pin_FpCftI: {
-      HChar* str = "fc???";
+      HChar* str = "fc?????";
+      /* Note that "fcfids" is missing from below. That instruction would
+       * satisfy the predicate:
+       *    (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False)
+       * which would go into a final "else" clause to make this if-else
+       * block balanced.  But we're able to implement fcfids by leveraging
+       * the fcfid implementation, so it wasn't necessary to include it here.
+       */
       if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False)
-         str = "fctid";
-      else
-      if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True)
-         str = "fctiw";
-      else
-      if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False)
-         str = "fcfid";
+         if (i->Pin.FpCftI.syned == True)
+            str = "fctid";
+         else
+            str = "fctidu";
+      else if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True)
+         if (i->Pin.FpCftI.syned == True)
+            str = "fctiw";
+         else
+            str = "fctiwu";
+      else if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) {
+         if (i->Pin.FpCftI.syned == True) {
+            str = "fcfid";
+         } else {
+            if (i->Pin.FpCftI.flt64 == True)
+               str = "fcfidu";
+            else
+               str = "fcfidus";
+         }
+      }
       vex_printf("%s ", str);
       ppHRegPPC(i->Pin.FpCftI.dst);
       vex_printf(",");
@@ -2590,7 +2668,8 @@
    code and back.
 */
 Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i, 
-                    Bool mode64, void* dispatch )
+                    Bool mode64,
+                    void* dispatch_unassisted, void* dispatch_assisted )
 {
    UChar* p = &buf[0];
    UChar* ptmp = p;
@@ -2923,7 +3002,23 @@
       if (!mode64)
          vassert(sz32);
 
-      if (sz32) {
+      if (i->Pin.Div.extended) {
+         if (sz32) {
+            if (syned)
+               // divwe r_dst,r_srcL,r_srcR
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 427, 0);
+            else
+               // divweu r_dst,r_srcL,r_srcR
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 395, 0);
+         } else {
+            if (syned)
+               // divde r_dst,r_srcL,r_srcR
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 425, 0);
+            else
+               // divdeu r_dst,r_srcL,r_srcR
+               p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 393, 0);
+         }
+      } else if (sz32) {
          if (syned)  // divw r_dst,r_srcL,r_srcR
             p = mkFormXO(p, 31, r_dst, r_srcL, r_srcR, 0, 491, 0);
          else        // divwu r_dst,r_srcL,r_srcR
@@ -2979,7 +3074,8 @@
       UInt r_dst;
       ULong imm_dst;
 
-      vassert(dispatch == NULL);
+      vassert(dispatch_unassisted == NULL);
+      vassert(dispatch_assisted == NULL);
       
       /* First off, if this is conditional, create a conditional
          jump over the rest of it. */
@@ -3145,6 +3241,7 @@
          // Just load 1 to dst => li dst,1
          p = mkFormD(p, 14, r_dst, 0, 1);
       } else {
+         vassert(cond.flag != Pcf_NONE);
          rot_imm = 1 + cond.flag;
          r_tmp = 0;  // Not set in getAllocable, so no need to declare.
 
@@ -3376,19 +3473,41 @@
       UInt fr_dst = fregNo(i->Pin.FpCftI.dst);
       UInt fr_src = fregNo(i->Pin.FpCftI.src);
       if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == True) {
-         // fctiw (conv f64 to i32), PPC32 p404
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 14, 0);
-         goto done;
+         if (i->Pin.FpCftI.syned == True) {
+            // fctiw (conv f64 to i32), PPC32 p404
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 14, 0);
+            goto done;
+         } else {
+            // fctiwu (conv f64 to u32)
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 142, 0);
+            goto done;
+         }
       }
       if (i->Pin.FpCftI.fromI == False && i->Pin.FpCftI.int32 == False) {
-         // fctid (conv f64 to i64), PPC64 p437
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 814, 0);
-         goto done;
+         if (i->Pin.FpCftI.syned == True) {
+            // fctid (conv f64 to i64), PPC64 p437
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 814, 0);
+            goto done;
+         } else {
+            // fctidu (conv f64 to u64)
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 942, 0);
+            goto done;
+         }
       }
       if (i->Pin.FpCftI.fromI == True && i->Pin.FpCftI.int32 == False) {
-         // fcfid (conv i64 to f64), PPC64 p434
-         p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0);
-         goto done;
+         if (i->Pin.FpCftI.syned == True) {
+            // fcfid (conv i64 to f64), PPC64 p434
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 846, 0);
+            goto done;
+         } else if (i->Pin.FpCftI.flt64 == True) {
+            // fcfidu (conv u64 to f64)
+            p = mkFormX(p, 63, fr_dst, 0, fr_src, 974, 0);
+            goto done;
+         } else {
+            // fcfidus (conv u64 to f32)
+            p = mkFormX(p, 59, fr_dst, 0, fr_src, 974, 0);
+            goto done;
+         }
       }
       goto bad;
    }
diff --git a/main/VEX/priv/host_ppc_defs.h b/main/VEX/priv/host_ppc_defs.h
index accfd58..58ddb43 100644
--- a/main/VEX/priv/host_ppc_defs.h
+++ b/main/VEX/priv/host_ppc_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -162,15 +162,16 @@
       Pcf_7LT  = 28,  /* neg  | lt          */
       Pcf_7GT  = 29,  /* pos  | gt          */
       Pcf_7EQ  = 30,  /* zero | equal       */
-      Pcf_7SO  = 31   /* summary overflow   */
+      Pcf_7SO  = 31,  /* summary overflow   */
+      Pcf_NONE = 32   /* no condition; used with Pct_ALWAYS */
    }
    PPCCondFlag;
 
 typedef
    enum {   /* Maps bc bitfield BO */
-      Pct_FALSE  = 0x4,
-      Pct_TRUE   = 0xC,
-      Pct_ALWAYS = 0x14
+      Pct_FALSE  = 0x4, /* associated PPCCondFlag must not be Pcf_NONE */
+      Pct_TRUE   = 0xC, /* associated PPCCondFlag must not be Pcf_NONE */
+      Pct_ALWAYS = 0x14 /* associated PPCCondFlag must be Pcf_NONE */
    }
    PPCCondTest;
 
@@ -461,7 +462,7 @@
       Pin_FpLdSt,     /* FP load/store */
       Pin_FpSTFIW,    /* stfiwx */
       Pin_FpRSP,      /* FP round IEEE754 double to IEEE754 single */
-      Pin_FpCftI,     /* fcfid/fctid/fctiw */
+      Pin_FpCftI,     /* fcfid[u,s,us]/fctid[u]/fctiw[u] */
       Pin_FpCMov,     /* FP floating point conditional move */
       Pin_FpLdFPSCR,  /* mtfsf */
       Pin_FpCmp,      /* FP compare, generating value into int reg */
@@ -559,6 +560,7 @@
          } MulL;
          /* ppc32 div/divu instruction. */
          struct {
+            Bool extended;
             Bool syned;
             Bool sz32;   /* mode64 has both 32 & 64bit div */
             HReg dst;
@@ -662,11 +664,15 @@
             HReg src;
             HReg dst;
          } FpRSP;
-         /* fcfid/fctid/fctiw.  Note there's no fcfiw so fromI==True
-            && int32==True is not allowed. */
+         /* fcfid[u,s,us]/fctid[u]/fctiw[u].  Only some combinations
+            of the various fields are allowed.  This is asserted for
+            and documented in the code for the constructor,
+            PPCInstr_FpCftI, in host_ppc_defs.c.  */
          struct {
-            Bool fromI; /* False==F->I, True==I->F */
-            Bool int32; /* True== I is 32, False==I is 64 */
+            Bool fromI; /* True== I->F,    False== F->I */
+            Bool int32; /* True== I is 32, False== I is 64 */
+            Bool syned;
+            Bool flt64; /* True== F is 64, False== F is 32 */
             HReg src;
             HReg dst;
          } FpCftI;
@@ -788,7 +794,7 @@
 extern PPCInstr* PPCInstr_Cmp        ( Bool, Bool, UInt, HReg, PPCRH* );
 extern PPCInstr* PPCInstr_Unary      ( PPCUnaryOp op, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_MulL       ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg );
-extern PPCInstr* PPCInstr_Div        ( Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_Div        ( Bool extended, Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_Call       ( PPCCondCode, Addr64, UInt );
 extern PPCInstr* PPCInstr_Goto       ( IRJumpKind, PPCCondCode cond, PPCRI* dst );
 extern PPCInstr* PPCInstr_CMov       ( PPCCondCode, HReg dst, PPCRI* src );
@@ -811,8 +817,8 @@
 extern PPCInstr* PPCInstr_FpLdSt     ( Bool isLoad, UChar sz, HReg, PPCAMode* );
 extern PPCInstr* PPCInstr_FpSTFIW    ( HReg addr, HReg data );
 extern PPCInstr* PPCInstr_FpRSP      ( HReg dst, HReg src );
-extern PPCInstr* PPCInstr_FpCftI     ( Bool fromI, Bool int32, 
-                                       HReg dst, HReg src );
+extern PPCInstr* PPCInstr_FpCftI     ( Bool fromI, Bool int32, Bool syned,
+                                       Bool dst64, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_FpCMov     ( PPCCondCode, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_FpLdFPSCR  ( HReg src );
 extern PPCInstr* PPCInstr_FpCmp      ( HReg dst, HReg srcL, HReg srcR );
@@ -825,8 +831,8 @@
 extern PPCInstr* PPCInstr_AvBin8x16  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvBin16x8  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvBin32x4  ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
-extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvOp op, HReg dst, HReg srcL, HReg srcR );
-extern PPCInstr* PPCInstr_AvUn32Fx4  ( PPCAvOp op, HReg dst, HReg src );
+extern PPCInstr* PPCInstr_AvBin32Fx4 ( PPCAvFpOp op, HReg dst, HReg srcL, HReg srcR );
+extern PPCInstr* PPCInstr_AvUn32Fx4  ( PPCAvFpOp op, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_AvPerm     ( HReg dst, HReg srcL, HReg srcR, HReg ctl );
 extern PPCInstr* PPCInstr_AvSel      ( HReg ctl, HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_AvShlDbl   ( UChar shift, HReg dst, HReg srcL, HReg srcR );
@@ -842,7 +848,9 @@
 extern void         mapRegs_PPCInstr     ( HRegRemap*, PPCInstr* , Bool mode64);
 extern Bool         isMove_PPCInstr      ( PPCInstr*, HReg*, HReg* );
 extern Int          emit_PPCInstr        ( UChar* buf, Int nbuf, PPCInstr*, 
-                                           Bool mode64, void* dispatch );
+                                           Bool mode64,
+                                           void* dispatch_unassisted,
+                                           void* dispatch_assisted );
 
 extern void genSpill_PPC  ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
                             HReg rreg, Int offsetB, Bool mode64 );
diff --git a/main/VEX/priv/host_ppc_isel.c b/main/VEX/priv/host_ppc_isel.c
index 1609c74..642fc81 100644
--- a/main/VEX/priv/host_ppc_isel.c
+++ b/main/VEX/priv/host_ppc_isel.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -126,7 +126,10 @@
 
 fadd[.]                  if .             y             y
 fadds[.]                 if .             y             y
-fcfid[.] (i64->dbl)      if .             y             y
+fcfid[.] (Si64->dbl)     if .             y             y
+fcfidU[.] (Ui64->dbl)    if .             y             y
+fcfids[.] (Si64->sngl)   if .             Y             Y
+fcfidus[.] (Ui64->sngl)  if .             Y             Y
 fcmpo (cmp, result       n                n             n
 fcmpu  to crfD)          n                n             n
 fctid[.]  (dbl->i64)     if .       ->undef             y
@@ -559,7 +562,7 @@
 
    if (bias < -100 || bias > 100) /* somewhat arbitrarily */
       vpanic("genGuestArrayOffset(ppc host)(3)");
-   if (descr->base < 0 || descr->base > 4000) /* somewhat arbitrarily */
+   if (descr->base < 0 || descr->base > 5000) /* somewhat arbitrarily */
       vpanic("genGuestArrayOffset(ppc host)(4)");
 
    /* Compute off into a reg, %off.  Then return:
@@ -782,7 +785,7 @@
       }
 
       /* Fast scheme only applies for unconditional calls.  Hence: */
-      cc.test = Pct_ALWAYS;
+      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
 
    } else {
 
@@ -825,7 +828,7 @@
          because the argument computations could trash the condition
          codes.  Be a bit clever to handle the common case where the
          guard is 1:Bit. */
-      cc.test = Pct_ALWAYS;
+      cc = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
       if (guard) {
          if (guard->tag == Iex_Const 
              && guard->Iex.Const.con->tag == Ico_U1
@@ -1281,24 +1284,42 @@
 
       /* How about a div? */
       if (e->Iex.Binop.op == Iop_DivS32 || 
-          e->Iex.Binop.op == Iop_DivU32) {
-         Bool syned  = toBool(e->Iex.Binop.op == Iop_DivS32);
+          e->Iex.Binop.op == Iop_DivU32 ||
+          e->Iex.Binop.op == Iop_DivS32E ||
+          e->Iex.Binop.op == Iop_DivU32E) {
+         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS32) || (e->Iex.Binop.op == Iop_DivS32E));
          HReg r_dst  = newVRegI(env);
          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
-         addInstr(env, PPCInstr_Div(syned, True/*32bit div*/,
-                                    r_dst, r_srcL, r_srcR));
+         addInstr( env,
+                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivU32E )
+                                             || ( e->Iex.Binop.op == Iop_DivS32E ) ) ? True
+                                                                                     : False,
+                                    syned,
+                                    True/*32bit div*/,
+                                    r_dst,
+                                    r_srcL,
+                                    r_srcR ) );
          return r_dst;
       }
       if (e->Iex.Binop.op == Iop_DivS64 || 
-          e->Iex.Binop.op == Iop_DivU64) {
-         Bool syned  = toBool(e->Iex.Binop.op == Iop_DivS64);
+          e->Iex.Binop.op == Iop_DivU64 || e->Iex.Binop.op == Iop_DivS64E
+          || e->Iex.Binop.op == Iop_DivU64E ) {
+         Bool syned  = toBool((e->Iex.Binop.op == Iop_DivS64) ||(e->Iex.Binop.op == Iop_DivS64E));
          HReg r_dst  = newVRegI(env);
          HReg r_srcL = iselWordExpr_R(env, e->Iex.Binop.arg1);
          HReg r_srcR = iselWordExpr_R(env, e->Iex.Binop.arg2);
          vassert(mode64);
-         addInstr(env, PPCInstr_Div(syned, False/*64bit div*/,
-                                    r_dst, r_srcL, r_srcR));
+         addInstr( env,
+                      PPCInstr_Div( ( ( e->Iex.Binop.op == Iop_DivS64E )
+                                             || ( e->Iex.Binop.op
+                                                      == Iop_DivU64E ) ) ? True
+                                                                         : False,
+                                    syned,
+                                    False/*64bit div*/,
+                                    r_dst,
+                                    r_srcL,
+                                    r_srcR ) );
          return r_dst;
       }
 
@@ -1456,7 +1477,8 @@
          return r_ccIR;
       }
 
-      if (e->Iex.Binop.op == Iop_F64toI32S) {
+      if ( e->Iex.Binop.op == Iop_F64toI32S ||
+               e->Iex.Binop.op == Iop_F64toI32U ) {
          /* This works in both mode64 and mode32. */
          HReg      r1      = StackFramePtr(env->mode64);
          PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
@@ -1468,7 +1490,10 @@
          set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
 
          sub_from_sp( env, 16 );
-         addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/, 
+         addInstr(env, PPCInstr_FpCftI(False/*F->I*/, True/*int32*/,
+                                       e->Iex.Binop.op == Iop_F64toI32S ? True/*syned*/
+                                                                     : False,
+                                       True/*flt64*/,
                                        ftmp, fsrc));
          addInstr(env, PPCInstr_FpSTFIW(r1, ftmp));
          addInstr(env, PPCInstr_Load(4, idst, zero_r1, mode64));
@@ -1484,7 +1509,7 @@
          return idst;
       }
 
-      if (e->Iex.Binop.op == Iop_F64toI64S) {
+      if (e->Iex.Binop.op == Iop_F64toI64S || e->Iex.Binop.op == Iop_F64toI64U ) {
          if (mode64) {
             HReg      r1      = StackFramePtr(env->mode64);
             PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
@@ -1497,7 +1522,9 @@
 
             sub_from_sp( env, 16 );
             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
-                                          ftmp, fsrc));
+                                          ( e->Iex.Binop.op == Iop_F64toI64S ) ? True
+                                                                            : False,
+                                          True, ftmp, fsrc));
             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
             addInstr(env, PPCInstr_Load(8, idst, zero_r1, True/*mode64*/));
             add_to_sp( env, 16 );
@@ -1586,8 +1613,7 @@
       case Iop_16Sto64: {
          HReg   r_dst = newVRegI(env);
          HReg   r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
-         UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 :
-                                 op_unop==Iop_16Sto64 ? 48 : 32);
+         UShort amt   = toUShort(op_unop==Iop_8Sto64  ? 56 : 48);
          vassert(mode64);
          addInstr(env,
                   PPCInstr_Shft(Pshft_SHL, False/*64bit shift*/,
@@ -1612,6 +1638,7 @@
       case Iop_Not16:
       case Iop_Not32:
       case Iop_Not64: {
+         if (op_unop == Iop_Not64) vassert(mode64);
          HReg r_dst = newVRegI(env);
          HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg);
          addInstr(env, PPCInstr_Unary(Pun_NOT,r_dst,r_src));
@@ -2400,8 +2427,10 @@
       switch (e->Iex.Binop.op) {
       case Iop_CmpEQ32:  return mk_PPCCondCode( Pct_TRUE,  Pcf_7EQ );
       case Iop_CmpNE32:  return mk_PPCCondCode( Pct_FALSE, Pcf_7EQ );
-      case Iop_CmpLT32U: return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
-      case Iop_CmpLE32U: return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
+      case Iop_CmpLT32U: case Iop_CmpLT32S:
+         return mk_PPCCondCode( Pct_TRUE,  Pcf_7LT );
+      case Iop_CmpLE32U: case Iop_CmpLE32S:
+         return mk_PPCCondCode( Pct_FALSE, Pcf_7GT );
       default: vpanic("iselCondCode(ppc): CmpXX32");
       }
    }
@@ -2731,8 +2760,8 @@
             *rLo = iselWordExpr_R(env, e->Iex.Binop.arg2);
             return;
 
-         /* F64toI64S */
-         case Iop_F64toI64S: {
+         /* F64toI64[S|U] */
+         case Iop_F64toI64S: case Iop_F64toI64U: {
             HReg      tLo     = newVRegI(env);
             HReg      tHi     = newVRegI(env);
             HReg      r1      = StackFramePtr(env->mode64);
@@ -2747,7 +2776,8 @@
 
             sub_from_sp( env, 16 );
             addInstr(env, PPCInstr_FpCftI(False/*F->I*/, False/*int64*/,
-                                          ftmp, fsrc));
+                                          (op_binop == Iop_F64toI64S) ? True : False,
+                                          True, ftmp, fsrc));
             addInstr(env, PPCInstr_FpLdSt(False/*store*/, 8, ftmp, zero_r1));
             addInstr(env, PPCInstr_Load(4, tHi, zero_r1, False/*mode32*/));
             addInstr(env, PPCInstr_Load(4, tLo, four_r1, False/*mode32*/));
@@ -2880,6 +2910,18 @@
          return;
       }
 
+      case Iop_Not64: {
+         HReg xLo, xHi;
+         HReg tmpLo = newVRegI(env);
+         HReg tmpHi = newVRegI(env);
+         iselInt64Expr(&xHi, &xLo, env, e->Iex.Unop.arg);
+         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpLo,xLo));
+         addInstr(env, PPCInstr_Unary(Pun_NOT,tmpHi,xHi));
+         *rHi = tmpHi;
+         *rLo = tmpLo;
+         return;
+      }
+
       /* ReinterpF64asI64(e) */
       /* Given an IEEE754 double, produce an I64 with the same bit
          pattern. */
@@ -2941,6 +2983,8 @@
 /* DO NOT CALL THIS DIRECTLY */
 static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e )
 {
+   Bool        mode64 = env->mode64;
+
    IRType ty = typeOfIRExpr(env->type_env,e);
    vassert(ty == Ity_F32);
 
@@ -3009,6 +3053,60 @@
       return fdst;
    }
 
+   if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64UtoF32) {
+      if (mode64) {
+         HReg fdst = newVRegF(env);
+         HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
+         HReg r1   = StackFramePtr(env->mode64);
+         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+
+         /* Set host rounding mode */
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+         sub_from_sp( env, 16 );
+
+         addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
+         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
+         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, 
+                                       False, False,
+                                       fdst, fdst));
+
+         add_to_sp( env, 16 );
+
+         ///* Restore default FPU rounding. */
+         //set_FPU_rounding_default( env );
+         return fdst;
+      } else {
+         /* 32-bit mode */
+         HReg fdst = newVRegF(env);
+         HReg isrcHi, isrcLo;
+         HReg r1   = StackFramePtr(env->mode64);
+         PPCAMode* zero_r1 = PPCAMode_IR( 0, r1 );
+         PPCAMode* four_r1 = PPCAMode_IR( 4, r1 );
+
+         iselInt64Expr(&isrcHi, &isrcLo, env, e->Iex.Binop.arg2);
+
+         /* Set host rounding mode */
+         set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+         sub_from_sp( env, 16 );
+
+         addInstr(env, PPCInstr_Store(4, zero_r1, isrcHi, False/*mode32*/));
+         addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
+         addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
+         addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, 
+                                       False, False,
+                                       fdst, fdst));
+
+         add_to_sp( env, 16 );
+
+         ///* Restore default FPU rounding. */
+         //set_FPU_rounding_default( env );
+         return fdst;
+      }
+
+   }
+
    vex_printf("iselFltExpr(ppc): No such tag(%u)\n", e->tag);
    ppIRExpr(e);
    vpanic("iselFltExpr_wrk(ppc)");
@@ -3185,7 +3283,7 @@
          return r_dst;
       }
 
-      if (e->Iex.Binop.op == Iop_I64StoF64) {
+      if (e->Iex.Binop.op == Iop_I64StoF64 || e->Iex.Binop.op == Iop_I64UtoF64) {
          if (mode64) {
             HReg fdst = newVRegF(env);
             HReg isrc = iselWordExpr_R(env, e->Iex.Binop.arg2);
@@ -3200,6 +3298,8 @@
             addInstr(env, PPCInstr_Store(8, zero_r1, isrc, True/*mode64*/));
             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, 
+                                          e->Iex.Binop.op == Iop_I64StoF64,
+                                          True/*fdst is 64 bit*/,
                                           fdst, fdst));
 
             add_to_sp( env, 16 );
@@ -3226,6 +3326,8 @@
             addInstr(env, PPCInstr_Store(4, four_r1, isrcLo, False/*mode32*/));
             addInstr(env, PPCInstr_FpLdSt(True/*load*/, 8, fdst, zero_r1));
             addInstr(env, PPCInstr_FpCftI(True/*I->F*/, False/*int64*/, 
+                                          e->Iex.Binop.op == Iop_I64StoF64,
+                                          True/*fdst is 64 bit*/,
                                           fdst, fdst));
 
             add_to_sp( env, 16 );
@@ -3272,7 +3374,31 @@
                return mk_LoadR64toFPR( env, r_src );
             }
          }
+
          case Iop_F32toF64: {
+            if (e->Iex.Unop.arg->tag == Iex_Unop &&
+                     e->Iex.Unop.arg->Iex.Unop.op == Iop_ReinterpI32asF32 ) {
+               e = e->Iex.Unop.arg;
+
+               HReg src = iselWordExpr_R(env, e->Iex.Unop.arg);
+               HReg fr_dst = newVRegF(env);
+               PPCAMode *am_addr;
+
+               sub_from_sp( env, 16 );        // Move SP down 16 bytes
+               am_addr = PPCAMode_IR( 0, StackFramePtr(env->mode64) );
+
+               // store src as Ity_I32's
+               addInstr(env, PPCInstr_Store( 4, am_addr, src, env->mode64 ));
+
+               // load single precision float, but the end results loads into a
+               // 64-bit FP register -- i.e., F64.
+               addInstr(env, PPCInstr_FpLdSt(True/*load*/, 4, fr_dst, am_addr));
+
+               add_to_sp( env, 16 );          // Reset SP
+               return fr_dst;
+            }
+
+
             /* this is a no-op */
             HReg res = iselFltExpr(env, e->Iex.Unop.arg);
             return res;
@@ -3328,6 +3454,7 @@
 {
    Bool mode64 = env->mode64;
    PPCAvOp op = Pav_INVALID;
+   PPCAvFpOp fpop = Pavfp_INVALID;
    IRType  ty = typeOfIRExpr(env->type_env,e);
    vassert(e);
    vassert(ty == Ity_V128);
@@ -3396,21 +3523,21 @@
          return dst;
       }
 
-      case Iop_Recip32Fx4:    op = Pavfp_RCPF;    goto do_32Fx4_unary;
-      case Iop_RSqrt32Fx4:    op = Pavfp_RSQRTF;  goto do_32Fx4_unary;
-      case Iop_I32UtoFx4:     op = Pavfp_CVTU2F;  goto do_32Fx4_unary;
-      case Iop_I32StoFx4:     op = Pavfp_CVTS2F;  goto do_32Fx4_unary;
-      case Iop_QFtoI32Ux4_RZ: op = Pavfp_QCVTF2U; goto do_32Fx4_unary;
-      case Iop_QFtoI32Sx4_RZ: op = Pavfp_QCVTF2S; goto do_32Fx4_unary;
-      case Iop_RoundF32x4_RM: op = Pavfp_ROUNDM;  goto do_32Fx4_unary;
-      case Iop_RoundF32x4_RP: op = Pavfp_ROUNDP;  goto do_32Fx4_unary;
-      case Iop_RoundF32x4_RN: op = Pavfp_ROUNDN;  goto do_32Fx4_unary;
-      case Iop_RoundF32x4_RZ: op = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
+      case Iop_Recip32Fx4:    fpop = Pavfp_RCPF;    goto do_32Fx4_unary;
+      case Iop_RSqrt32Fx4:    fpop = Pavfp_RSQRTF;  goto do_32Fx4_unary;
+      case Iop_I32UtoFx4:     fpop = Pavfp_CVTU2F;  goto do_32Fx4_unary;
+      case Iop_I32StoFx4:     fpop = Pavfp_CVTS2F;  goto do_32Fx4_unary;
+      case Iop_QFtoI32Ux4_RZ: fpop = Pavfp_QCVTF2U; goto do_32Fx4_unary;
+      case Iop_QFtoI32Sx4_RZ: fpop = Pavfp_QCVTF2S; goto do_32Fx4_unary;
+      case Iop_RoundF32x4_RM: fpop = Pavfp_ROUNDM;  goto do_32Fx4_unary;
+      case Iop_RoundF32x4_RP: fpop = Pavfp_ROUNDP;  goto do_32Fx4_unary;
+      case Iop_RoundF32x4_RN: fpop = Pavfp_ROUNDN;  goto do_32Fx4_unary;
+      case Iop_RoundF32x4_RZ: fpop = Pavfp_ROUNDZ;  goto do_32Fx4_unary;
       do_32Fx4_unary:
       {
          HReg arg = iselVecExpr(env, e->Iex.Unop.arg);
          HReg dst = newVRegV(env);
-         addInstr(env, PPCInstr_AvUn32Fx4(op, dst, arg));
+         addInstr(env, PPCInstr_AvUn32Fx4(fpop, dst, arg));
          return dst;
       }
 
@@ -3513,20 +3640,20 @@
          }
       }
 
-      case Iop_Add32Fx4:   op = Pavfp_ADDF;   goto do_32Fx4;
-      case Iop_Sub32Fx4:   op = Pavfp_SUBF;   goto do_32Fx4;
-      case Iop_Max32Fx4:   op = Pavfp_MAXF;   goto do_32Fx4;
-      case Iop_Min32Fx4:   op = Pavfp_MINF;   goto do_32Fx4;
-      case Iop_Mul32Fx4:   op = Pavfp_MULF;   goto do_32Fx4;
-      case Iop_CmpEQ32Fx4: op = Pavfp_CMPEQF; goto do_32Fx4;
-      case Iop_CmpGT32Fx4: op = Pavfp_CMPGTF; goto do_32Fx4;
-      case Iop_CmpGE32Fx4: op = Pavfp_CMPGEF; goto do_32Fx4;
+      case Iop_Add32Fx4:   fpop = Pavfp_ADDF;   goto do_32Fx4;
+      case Iop_Sub32Fx4:   fpop = Pavfp_SUBF;   goto do_32Fx4;
+      case Iop_Max32Fx4:   fpop = Pavfp_MAXF;   goto do_32Fx4;
+      case Iop_Min32Fx4:   fpop = Pavfp_MINF;   goto do_32Fx4;
+      case Iop_Mul32Fx4:   fpop = Pavfp_MULF;   goto do_32Fx4;
+      case Iop_CmpEQ32Fx4: fpop = Pavfp_CMPEQF; goto do_32Fx4;
+      case Iop_CmpGT32Fx4: fpop = Pavfp_CMPGTF; goto do_32Fx4;
+      case Iop_CmpGE32Fx4: fpop = Pavfp_CMPGEF; goto do_32Fx4;
       do_32Fx4:
       {
          HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
          HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
          HReg dst = newVRegV(env);
-         addInstr(env, PPCInstr_AvBin32Fx4(op, dst, argL, argR));
+         addInstr(env, PPCInstr_AvBin32Fx4(fpop, dst, argL, argR));
          return dst;
       }
 
@@ -3598,11 +3725,11 @@
       case Iop_Shr16x8:    op = Pav_SHR;    goto do_AvBin16x8;
       case Iop_Sar16x8:    op = Pav_SAR;    goto do_AvBin16x8;
       case Iop_Rol16x8:    op = Pav_ROTL;   goto do_AvBin16x8;
-      case Iop_Narrow16x8:       op = Pav_PACKUU;  goto do_AvBin16x8;
-      case Iop_QNarrow16Ux8:     op = Pav_QPACKUU; goto do_AvBin16x8;
-      case Iop_QNarrow16Sx8:     op = Pav_QPACKSS; goto do_AvBin16x8;
-      case Iop_InterleaveHI16x8: op = Pav_MRGHI;  goto do_AvBin16x8;
-      case Iop_InterleaveLO16x8: op = Pav_MRGLO;  goto do_AvBin16x8;
+      case Iop_NarrowBin16to8x16:    op = Pav_PACKUU;  goto do_AvBin16x8;
+      case Iop_QNarrowBin16Uto8Ux16: op = Pav_QPACKUU; goto do_AvBin16x8;
+      case Iop_QNarrowBin16Sto8Sx16: op = Pav_QPACKSS; goto do_AvBin16x8;
+      case Iop_InterleaveHI16x8:  op = Pav_MRGHI;  goto do_AvBin16x8;
+      case Iop_InterleaveLO16x8:  op = Pav_MRGLO;  goto do_AvBin16x8;
       case Iop_Add16x8:    op = Pav_ADDU;   goto do_AvBin16x8;
       case Iop_QAdd16Ux8:  op = Pav_QADDU;  goto do_AvBin16x8;
       case Iop_QAdd16Sx8:  op = Pav_QADDS;  goto do_AvBin16x8;
@@ -3632,11 +3759,11 @@
       case Iop_Shr32x4:    op = Pav_SHR;    goto do_AvBin32x4;
       case Iop_Sar32x4:    op = Pav_SAR;    goto do_AvBin32x4;
       case Iop_Rol32x4:    op = Pav_ROTL;   goto do_AvBin32x4;
-      case Iop_Narrow32x4:       op = Pav_PACKUU;  goto do_AvBin32x4;
-      case Iop_QNarrow32Ux4:     op = Pav_QPACKUU; goto do_AvBin32x4;
-      case Iop_QNarrow32Sx4:     op = Pav_QPACKSS; goto do_AvBin32x4;
-      case Iop_InterleaveHI32x4: op = Pav_MRGHI;  goto do_AvBin32x4;
-      case Iop_InterleaveLO32x4: op = Pav_MRGLO;  goto do_AvBin32x4;
+      case Iop_NarrowBin32to16x8:    op = Pav_PACKUU;  goto do_AvBin32x4;
+      case Iop_QNarrowBin32Uto16Ux8: op = Pav_QPACKUU; goto do_AvBin32x4;
+      case Iop_QNarrowBin32Sto16Sx8: op = Pav_QPACKSS; goto do_AvBin32x4;
+      case Iop_InterleaveHI32x4:  op = Pav_MRGHI;  goto do_AvBin32x4;
+      case Iop_InterleaveLO32x4:  op = Pav_MRGLO;  goto do_AvBin32x4;
       case Iop_Add32x4:    op = Pav_ADDU;   goto do_AvBin32x4;
       case Iop_QAdd32Ux4:  op = Pav_QADDU;  goto do_AvBin32x4;
       case Iop_QAdd32Sx4:  op = Pav_QADDS;  goto do_AvBin32x4;
@@ -4100,7 +4227,7 @@
       ppIRExpr(next);
       vex_printf("\n");
    }
-   cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_7EQ );
+   cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
    ri = iselWordExpr_RI(env, next);
    addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
    addInstr(env, PPCInstr_Goto(jk, cond, ri));
@@ -4129,10 +4256,10 @@
 
    /* do some sanity checks */
    mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
-            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX;
+            | VEX_HWCAPS_PPC32_FX | VEX_HWCAPS_PPC32_GX | VEX_HWCAPS_PPC32_VX;
 
-   mask64 = VEX_HWCAPS_PPC64_V
-            | VEX_HWCAPS_PPC64_FX | VEX_HWCAPS_PPC64_GX;
+   mask64 = VEX_HWCAPS_PPC64_V | VEX_HWCAPS_PPC64_FX
+	   | VEX_HWCAPS_PPC64_GX | VEX_HWCAPS_PPC64_VX;
 
    if (mode64) {
       vassert((hwcaps_host & mask32) == 0);
diff --git a/main/VEX/priv/host_s390_defs.c b/main/VEX/priv/host_s390_defs.c
new file mode 100644
index 0000000..a8052b5
--- /dev/null
+++ b/main/VEX/priv/host_s390_defs.c
@@ -0,0 +1,7157 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                  host_s390_defs.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+#include "libvex_trc_values.h"
+#include "libvex_guest_offsets.h"
+#include "libvex_s390x_common.h"
+
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_s390_defs.h"
+#include "host_s390_disasm.h"
+#include <stdarg.h>
+
+/* KLUDGE: We need to know the hwcaps of the host when generating
+   code. But that info is not passed to emit_S390Instr. Only mode64 is
+   being passed. So, ideally, we want this passed as an argument, too.
+   Until then, we use a global variable. This variable is set as a side
+   effect of iselSB_S390. This is safe because instructions are selected
+   before they are emitted. */
+const VexArchInfo *s390_archinfo_host;
+
+
+/*------------------------------------------------------------*/
+/*--- Forward declarations                                 ---*/
+/*------------------------------------------------------------*/
+
+static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
+static void s390_insn_map_regs(HRegRemap *, s390_insn *);
+static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
+
+
+/*------------------------------------------------------------*/
+/*--- Registers                                            ---*/
+/*------------------------------------------------------------*/
+
+/* Decompile the given register into a static buffer and return it */
+const HChar *
+s390_hreg_as_string(HReg reg)
+{
+   static HChar buf[10];
+
+   static const HChar ireg_names[16][5] = {
+      "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+      "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
+   };
+
+   static const HChar freg_names[16][5] = {
+      "%f0",  "%f1",  "%f2",  "%f3",  "%f4",  "%f5",  "%f6",  "%f7",
+      "%f8",  "%f9",  "%f10", "%f11", "%f12", "%f13", "%f14", "%f15"
+   };
+
+   UInt r;  /* hregNumber() returns an UInt */
+
+   r = hregNumber(reg);
+
+   /* Be generic for all virtual regs. */
+   if (hregIsVirtual(reg)) {
+      buf[0] = '\0';
+      switch (hregClass(reg)) {
+      case HRcInt64: vex_sprintf(buf, "%%vR%d", r); break;
+      case HRcFlt64: vex_sprintf(buf, "%%vF%d", r); break;
+      default:       goto fail;
+      }
+      return buf;
+   }
+
+   /* But specific for real regs. */
+   vassert(r < 16);
+
+   switch (hregClass(reg)) {
+   case HRcInt64: return ireg_names[r];
+   case HRcFlt64: return freg_names[r];
+   default:       goto fail;
+   }
+
+ fail: vpanic("s390_hreg_as_string");
+}
+
+
+/* Tell the register allocator which registers can be allocated. */
+static void
+s390_hreg_get_allocable(Int *nregs, HReg **arr)
+{
+   UInt i;
+
+   /* Total number of allocable registers (all classes) */
+   *nregs =  16 /* GPRs */
+      -  1 /* r0 */
+      -  1 /* r12 register holding VG_(dispatch_ctr) */
+      -  1 /* r13 guest state pointer */
+      -  1 /* r14 link register */
+      -  1 /* r15 stack pointer */
+      + 16 /* FPRs */
+      ;
+
+   *arr = LibVEX_Alloc(*nregs * sizeof(HReg));
+
+   i = 0;
+
+   /* GPR0 is not available because it is interpreted as 0, when used
+      as a base or index register. */
+   (*arr)[i++] = mkHReg(1,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(2,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(3,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(4,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(5,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(6,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(7,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(8,  HRcInt64, False);
+   (*arr)[i++] = mkHReg(9,  HRcInt64, False);
+   /* GPR10 and GPR11 are used for instructions that use register pairs.
+      Otherwise, they are available to the allocator */
+   (*arr)[i++] = mkHReg(10, HRcInt64, False);
+   (*arr)[i++] = mkHReg(11, HRcInt64, False);
+   /* GPR12 is not available because it caches VG_(dispatch_ctr) */
+   /* GPR13 is not available because it is used as guest state pointer */
+   /* GPR14 is not available because it is used as link register */
+   /* GPR15 is not available because it is used as stack pointer */
+
+   /* Add the available real (non-virtual) FPRs */
+   (*arr)[i++] = mkHReg(0,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(1,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(2,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(3,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(4,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(5,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(6,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(7,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(8,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(9,  HRcFlt64, False);
+   (*arr)[i++] = mkHReg(10, HRcFlt64, False);
+   (*arr)[i++] = mkHReg(11, HRcFlt64, False);
+   (*arr)[i++] = mkHReg(12, HRcFlt64, False);
+   (*arr)[i++] = mkHReg(13, HRcFlt64, False);
+   (*arr)[i++] = mkHReg(14, HRcFlt64, False);
+   (*arr)[i++] = mkHReg(15, HRcFlt64, False);
+   /* FPR12 - FPR15 are also used as register pairs for 128-bit
+      floating point operations */
+}
+
+
+/* Return the real register that holds the guest state pointer */
+HReg
+s390_hreg_guest_state_pointer(void)
+{
+   return mkHReg(S390_REGNO_GUEST_STATE_POINTER, HRcInt64, False);
+}
+
+/* Is VALUE within the domain of a 20-bit signed integer. */
+static __inline__ Bool
+fits_signed_20bit(Int value)
+{
+   return ((value << 12) >> 12) == value;
+}
+
+
+/* Is VALUE within the domain of a 12-bit unsigned integer. */
+static __inline__ Bool
+fits_unsigned_12bit(Int value)
+{
+   return (value & 0xFFF) == value;
+}
+
+/*------------------------------------------------------------*/
+/*--- Addressing modes (amodes)                            ---*/
+/*------------------------------------------------------------*/
+
+/* Construct a b12 amode. */
+s390_amode *
+s390_amode_b12(Int d, HReg b)
+{
+   s390_amode *am = LibVEX_Alloc(sizeof(s390_amode));
+
+   vassert(fits_unsigned_12bit(d));
+
+   am->tag = S390_AMODE_B12;
+   am->d = d;
+   am->b = b;
+   am->x = 0;  /* hregNumber(0) == 0 */
+
+   return am;
+}
+
+
+/* Construct a b20 amode. */
+s390_amode *
+s390_amode_b20(Int d, HReg b)
+{
+   s390_amode *am = LibVEX_Alloc(sizeof(s390_amode));
+
+   vassert(fits_signed_20bit(d));
+
+   am->tag = S390_AMODE_B20;
+   am->d = d;
+   am->b = b;
+   am->x = 0;  /* hregNumber(0) == 0 */
+
+   return am;
+}
+
+
+/* Construct a bx12 amode. */
+s390_amode *
+s390_amode_bx12(Int d, HReg b, HReg x)
+{
+   s390_amode *am = LibVEX_Alloc(sizeof(s390_amode));
+
+   vassert(fits_unsigned_12bit(d));
+   vassert(b != 0);
+   vassert(x != 0);
+
+   am->tag = S390_AMODE_BX12;
+   am->d = d;
+   am->b = b;
+   am->x = x;
+
+   return am;
+}
+
+
+/* Construct a bx20 amode. */
+s390_amode *
+s390_amode_bx20(Int d, HReg b, HReg x)
+{
+   s390_amode *am = LibVEX_Alloc(sizeof(s390_amode));
+
+   vassert(fits_signed_20bit(d));
+   vassert(b != 0);
+   vassert(x != 0);
+
+   am->tag = S390_AMODE_BX20;
+   am->d = d;
+   am->b = b;
+   am->x = x;
+
+   return am;
+}
+
+
+/* Construct an AMODE for accessing the guest state at OFFSET */
+s390_amode *
+s390_amode_for_guest_state(Int offset)
+{
+   if (fits_unsigned_12bit(offset))
+      return s390_amode_b12(offset, s390_hreg_guest_state_pointer());
+
+   vpanic("invalid guest state offset");
+}
+
+
+/* Decompile the given amode into a static buffer and return it. */
+const HChar *
+s390_amode_as_string(const s390_amode *am)
+{
+   static HChar buf[30];
+   HChar *p;
+
+   buf[0] = '\0';
+   p = buf;
+
+   switch (am->tag) {
+   case S390_AMODE_B12:
+   case S390_AMODE_B20:
+      vex_sprintf(p, "%d(%s)", am->d, s390_hreg_as_string(am->b));
+      break;
+
+   case S390_AMODE_BX12:
+   case S390_AMODE_BX20:
+      /* s390_hreg_as_string returns pointer to local buffer. Need to
+         split this into two printfs */
+      p += vex_sprintf(p, "%d(%s,", am->d, s390_hreg_as_string(am->x));
+      vex_sprintf(p, "%s)", s390_hreg_as_string(am->b));
+      break;
+
+   default:
+      vpanic("s390_amode_as_string");
+   }
+
+   return buf;
+}
+
+
+/* Helper function for s390_amode_is_sane */
+static __inline__ Bool
+is_virtual_gpr(HReg reg)
+{
+   return hregIsVirtual(reg) && hregClass(reg) == HRcInt64;
+}
+
+
+/* Sanity check for an amode */
+Bool
+s390_amode_is_sane(const s390_amode *am)
+{
+   switch (am->tag) {
+   case S390_AMODE_B12:
+      return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
+
+   case S390_AMODE_B20:
+      return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
+
+   case S390_AMODE_BX12:
+      return is_virtual_gpr(am->b) && is_virtual_gpr(am->x) &&
+             fits_unsigned_12bit(am->d);
+
+   case S390_AMODE_BX20:
+      return is_virtual_gpr(am->b) && is_virtual_gpr(am->x) &&
+             fits_signed_20bit(am->d);
+
+   default:
+      vpanic("s390_amode_is_sane");
+   }
+}
+
+
+/* Record the register use of an amode */
+static void
+s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
+{
+   switch (am->tag) {
+   case S390_AMODE_B12:
+   case S390_AMODE_B20:
+      addHRegUse(u, HRmRead, am->b);
+      return;
+
+   case S390_AMODE_BX12:
+   case S390_AMODE_BX20:
+      addHRegUse(u, HRmRead, am->b);
+      addHRegUse(u, HRmRead, am->x);
+      return;
+
+   default:
+      vpanic("s390_amode_get_reg_usage");
+   }
+}
+
+
+static void
+s390_amode_map_regs(HRegRemap *m, s390_amode *am)
+{
+   switch (am->tag) {
+   case S390_AMODE_B12:
+   case S390_AMODE_B20:
+      am->b = lookupHRegRemap(m, am->b);
+      return;
+
+   case S390_AMODE_BX12:
+   case S390_AMODE_BX20:
+      am->b = lookupHRegRemap(m, am->b);
+      am->x = lookupHRegRemap(m, am->x);
+      return;
+
+   default:
+      vpanic("s390_amode_map_regs");
+   }
+}
+
+
+void
+ppS390AMode(s390_amode *am)
+{
+   vex_printf("%s", s390_amode_as_string(am));
+}
+
+void
+ppS390Instr(s390_insn *insn, Bool mode64)
+{
+   vex_printf("%s", s390_insn_as_string(insn));
+}
+
+void
+ppHRegS390(HReg reg)
+{
+   vex_printf("%s", s390_hreg_as_string(reg));
+}
+
+/*------------------------------------------------------------*/
+/*--- Helpers for register allocation                      ---*/
+/*------------------------------------------------------------*/
+
+/* Called once per translation. */
+void
+getAllocableRegs_S390(Int *nregs, HReg **arr, Bool mode64)
+{
+   s390_hreg_get_allocable(nregs, arr);
+}
+
+
+/* Tell the register allocator how the given instruction uses the registers
+   it refers to. */
+void
+getRegUsage_S390Instr(HRegUsage *u, s390_insn *insn, Bool mode64)
+{
+   s390_insn_get_reg_usage(u, insn);
+}
+
+
+/* Map the registers of the given instruction */
+void
+mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64)
+{
+   s390_insn_map_regs(m, insn);
+}
+
+
+/* Figure out if the given insn represents a reg-reg move, and if so
+   assign the source and destination to *src and *dst.  If in doubt say No.
+   Used by the register allocator to do move coalescing. */
+Bool
+isMove_S390Instr(s390_insn *insn, HReg *src, HReg *dst)
+{
+   return s390_insn_is_reg_reg_move(insn, src, dst);
+}
+
+
+/* Generate s390 spill/reload instructions under the direction of the
+   register allocator.  Note it's critical these don't write the
+   condition codes. This is like an Ist_Put */
+void
+genSpill_S390(HInstr **i1, HInstr **i2, HReg rreg, Int offsetB, Bool mode64)
+{
+   s390_amode *am;
+
+   vassert(offsetB >= 0);
+   vassert(offsetB <= (1 << 12));  /* because we use b12 amode */
+   vassert(!hregIsVirtual(rreg));
+
+   *i1 = *i2 = NULL;
+
+   am = s390_amode_for_guest_state(offsetB);
+
+   switch (hregClass(rreg)) {
+   case HRcInt64:
+   case HRcFlt64:
+      *i1 = s390_insn_store(8, am, rreg);
+      return;
+
+   default:
+      ppHRegClass(hregClass(rreg));
+      vpanic("genSpill_S390: unimplemented regclass");
+   }
+}
+
+
+/* This is like an Iex_Get */
+void
+genReload_S390(HInstr **i1, HInstr **i2, HReg rreg, Int offsetB, Bool mode64)
+{
+   s390_amode *am;
+
+   vassert(offsetB >= 0);
+   vassert(offsetB <= (1 << 12));  /* because we use b12 amode */
+   vassert(!hregIsVirtual(rreg));
+
+   *i1 = *i2 = NULL;
+
+   am = s390_amode_for_guest_state(offsetB);
+
+   switch (hregClass(rreg)) {
+   case HRcInt64:
+   case HRcFlt64:
+      *i1 = s390_insn_load(8, rreg, am);
+      return;
+
+   default:
+      ppHRegClass(hregClass(rreg));
+      vpanic("genReload_S390: unimplemented regclass");
+   }
+}
+
+/* Helper function for s390_insn_get_reg_usage */
+static void
+s390_opnd_RMI_get_reg_usage(HRegUsage *u, s390_opnd_RMI op)
+{
+   switch (op.tag) {
+   case S390_OPND_REG:
+      addHRegUse(u, HRmRead, op.variant.reg);
+      break;
+
+   case S390_OPND_AMODE:
+      s390_amode_get_reg_usage(u, op.variant.am);
+      break;
+
+   case S390_OPND_IMMEDIATE:
+      break;
+
+   default:
+      vpanic("s390_opnd_RMI_get_reg_usage");
+   }
+}
+
+
+/* Tell the register allocator how the given insn uses the registers */
+static void
+s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
+{
+   initHRegUsage(u);
+
+   switch (insn->tag) {
+   case S390_INSN_LOAD:
+      addHRegUse(u, HRmWrite, insn->variant.load.dst);
+      s390_amode_get_reg_usage(u, insn->variant.load.src);
+      break;
+
+   case S390_INSN_LOAD_IMMEDIATE:
+      addHRegUse(u, HRmWrite, insn->variant.load_immediate.dst);
+      break;
+
+   case S390_INSN_STORE:
+      addHRegUse(u, HRmRead, insn->variant.store.src);
+      s390_amode_get_reg_usage(u, insn->variant.store.dst);
+      break;
+
+   case S390_INSN_MOVE:
+      addHRegUse(u, HRmRead,  insn->variant.move.src);
+      addHRegUse(u, HRmWrite, insn->variant.move.dst);
+      break;
+
+   case S390_INSN_COND_MOVE:
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.cond_move.src);
+      addHRegUse(u, HRmWrite, insn->variant.cond_move.dst);
+      break;
+
+   case S390_INSN_ALU:
+      addHRegUse(u, HRmWrite, insn->variant.alu.dst);
+      addHRegUse(u, HRmRead,  insn->variant.alu.dst);  /* op1 */
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.alu.op2);
+      break;
+
+   case S390_INSN_MUL:
+      addHRegUse(u, HRmRead,  insn->variant.mul.dst_lo);  /* op1 */
+      addHRegUse(u, HRmWrite, insn->variant.mul.dst_lo);
+      addHRegUse(u, HRmWrite, insn->variant.mul.dst_hi);
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.mul.op2);
+      break;
+
+   case S390_INSN_DIV:
+      addHRegUse(u, HRmRead,  insn->variant.div.op1_lo);
+      addHRegUse(u, HRmRead,  insn->variant.div.op1_hi);
+      addHRegUse(u, HRmWrite, insn->variant.div.op1_lo);
+      addHRegUse(u, HRmWrite, insn->variant.div.op1_hi);
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.div.op2);
+      break;
+
+   case S390_INSN_DIVS:
+      addHRegUse(u, HRmRead,  insn->variant.divs.op1);
+      addHRegUse(u, HRmWrite, insn->variant.divs.op1); /* quotient */
+      addHRegUse(u, HRmWrite, insn->variant.divs.rem); /* remainder */
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.divs.op2);
+      break;
+
+   case S390_INSN_CLZ:
+      addHRegUse(u, HRmWrite, insn->variant.clz.num_bits);
+      addHRegUse(u, HRmWrite, insn->variant.clz.clobber);
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.clz.src);
+      break;
+
+   case S390_INSN_UNOP:
+      addHRegUse(u, HRmWrite, insn->variant.unop.dst);
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.unop.src);
+      break;
+
+   case S390_INSN_TEST:
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.test.src);
+      break;
+
+   case S390_INSN_CC2BOOL:
+      addHRegUse(u, HRmWrite, insn->variant.cc2bool.dst);
+      break;
+
+   case S390_INSN_CAS:
+      addHRegUse(u, HRmRead,  insn->variant.cas.op1);
+      s390_amode_get_reg_usage(u, insn->variant.cas.op2);
+      addHRegUse(u, HRmRead,  insn->variant.cas.op3);
+      addHRegUse(u, HRmWrite,  insn->variant.cas.old_mem);
+      break;
+
+   case S390_INSN_COMPARE:
+      addHRegUse(u, HRmRead, insn->variant.compare.src1);
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.compare.src2);
+      break;
+
+   case S390_INSN_BRANCH:
+      s390_opnd_RMI_get_reg_usage(u, insn->variant.branch.dst);
+      /* The destination address is loaded into S390_REGNO_RETURN_VALUE.
+         See s390_insn_branch_emit. */
+      addHRegUse(u, HRmWrite,
+                 mkHReg(S390_REGNO_RETURN_VALUE, HRcInt64, False));
+      break;
+
+   case S390_INSN_HELPER_CALL: {
+      UInt i;
+
+      /* Assume that all volatile registers are clobbered. ABI says,
+         volatile registers are: r0 - r5. Valgrind's register allocator
+         does not know about r0, so we can leave that out */
+      for (i = 1; i <= 5; ++i) {
+         addHRegUse(u, HRmWrite, mkHReg(i, HRcInt64, False));
+      }
+
+      /* Ditto for floating point registers. f0 - f7 are volatile */
+      for (i = 0; i <= 7; ++i) {
+         addHRegUse(u, HRmWrite, mkHReg(i, HRcFlt64, False));
+      }
+
+      /* The registers that are used for passing arguments will be read.
+         Not all of them may, but in general we need to assume that. */
+      for (i = 0; i < insn->variant.helper_call.num_args; ++i) {
+         addHRegUse(u, HRmRead, mkHReg(s390_gprno_from_arg_index(i),
+                                       HRcInt64, False));
+      }
+
+      /* s390_insn_helper_call_emit also reads / writes the link register
+         and stack pointer. But those registers are not visible to the
+         register allocator. So we don't need to do anything for them. */
+      break;
+   }
+
+   case S390_INSN_BFP_TRIOP:
+      addHRegUse(u, HRmWrite, insn->variant.bfp_triop.dst);
+      addHRegUse(u, HRmRead,  insn->variant.bfp_triop.dst);  /* first */
+      addHRegUse(u, HRmRead,  insn->variant.bfp_triop.op2);  /* second */
+      addHRegUse(u, HRmRead,  insn->variant.bfp_triop.op3);  /* third */
+      break;
+
+   case S390_INSN_BFP_BINOP:
+      addHRegUse(u, HRmWrite, insn->variant.bfp_binop.dst);
+      addHRegUse(u, HRmRead,  insn->variant.bfp_binop.dst);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp_binop.op2);  /* right */
+      break;
+
+   case S390_INSN_BFP_UNOP:
+      addHRegUse(u, HRmWrite, insn->variant.bfp_unop.dst);
+      addHRegUse(u, HRmRead,  insn->variant.bfp_unop.op);  /* operand */
+      break;
+
+   case S390_INSN_BFP_COMPARE:
+      addHRegUse(u, HRmWrite, insn->variant.bfp_compare.dst);
+      addHRegUse(u, HRmRead,  insn->variant.bfp_compare.op1);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp_compare.op2);  /* right */
+      break;
+
+   case S390_INSN_BFP128_BINOP:
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_binop.dst_hi);
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_binop.dst_lo);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_binop.dst_hi);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_binop.dst_lo);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_binop.op2_hi);  /* right */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_binop.op2_lo);  /* right */
+      break;
+
+   case S390_INSN_BFP128_COMPARE:
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_compare.dst);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_compare.op1_hi);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_compare.op1_lo);  /* left */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_compare.op2_hi);  /* right */
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_compare.op2_lo);  /* right */
+      break;
+
+   case S390_INSN_BFP128_UNOP:
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_unop.dst_hi);
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_unop.dst_lo);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_unop.op_hi);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_unop.op_lo);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_TO:
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_unop.dst_hi);
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_unop.dst_lo);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_unop.op_hi);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_FROM:
+      addHRegUse(u, HRmWrite, insn->variant.bfp128_unop.dst_hi);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_unop.op_hi);
+      addHRegUse(u, HRmRead,  insn->variant.bfp128_unop.op_lo);
+      break;
+
+   case S390_INSN_MFENCE:
+      break;
+
+   default:
+      vpanic("s390_insn_get_reg_usage");
+   }
+}
+
+
+/* Helper function for s390_insn_map_regs */
+static void
+s390_opnd_RMI_map_regs(HRegRemap *m, s390_opnd_RMI *op)
+{
+   switch (op->tag) {
+   case S390_OPND_REG:
+      op->variant.reg = lookupHRegRemap(m, op->variant.reg);
+      break;
+
+   case S390_OPND_IMMEDIATE:
+      break;
+
+   case S390_OPND_AMODE:
+      s390_amode_map_regs(m, op->variant.am);
+      break;
+
+   default:
+      vpanic("s390_opnd_RMI_map_regs");
+   }
+}
+
+
+static void
+s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
+{
+   switch (insn->tag) {
+   case S390_INSN_LOAD:
+      insn->variant.load.dst = lookupHRegRemap(m, insn->variant.load.dst);
+      s390_amode_map_regs(m, insn->variant.load.src);
+      break;
+
+   case S390_INSN_STORE:
+      s390_amode_map_regs(m, insn->variant.store.dst);
+      insn->variant.store.src = lookupHRegRemap(m, insn->variant.store.src);
+      break;
+
+   case S390_INSN_MOVE:
+      insn->variant.move.dst = lookupHRegRemap(m, insn->variant.move.dst);
+      insn->variant.move.src = lookupHRegRemap(m, insn->variant.move.src);
+      break;
+
+   case S390_INSN_COND_MOVE:
+      insn->variant.cond_move.dst = lookupHRegRemap(m, insn->variant.cond_move.dst);
+      s390_opnd_RMI_map_regs(m, &insn->variant.cond_move.src);
+      break;
+
+   case S390_INSN_LOAD_IMMEDIATE:
+      insn->variant.load_immediate.dst =
+         lookupHRegRemap(m, insn->variant.load_immediate.dst);
+      break;
+
+   case S390_INSN_ALU:
+      insn->variant.alu.dst = lookupHRegRemap(m, insn->variant.alu.dst);
+      s390_opnd_RMI_map_regs(m, &insn->variant.alu.op2);
+      break;
+
+   case S390_INSN_MUL:
+      insn->variant.mul.dst_hi = lookupHRegRemap(m, insn->variant.mul.dst_hi);
+      insn->variant.mul.dst_lo = lookupHRegRemap(m, insn->variant.mul.dst_lo);
+      s390_opnd_RMI_map_regs(m, &insn->variant.mul.op2);
+      break;
+
+   case S390_INSN_DIV:
+      insn->variant.div.op1_hi = lookupHRegRemap(m, insn->variant.div.op1_hi);
+      insn->variant.div.op1_lo = lookupHRegRemap(m, insn->variant.div.op1_lo);
+      s390_opnd_RMI_map_regs(m, &insn->variant.div.op2);
+      break;
+
+   case S390_INSN_DIVS:
+      insn->variant.divs.op1 = lookupHRegRemap(m, insn->variant.divs.op1);
+      insn->variant.divs.rem = lookupHRegRemap(m, insn->variant.divs.rem);
+      s390_opnd_RMI_map_regs(m, &insn->variant.divs.op2);
+      break;
+
+   case S390_INSN_CLZ:
+      insn->variant.clz.num_bits = lookupHRegRemap(m, insn->variant.clz.num_bits);
+      insn->variant.clz.clobber  = lookupHRegRemap(m, insn->variant.clz.clobber);
+      s390_opnd_RMI_map_regs(m, &insn->variant.clz.src);
+      break;
+
+   case S390_INSN_UNOP:
+      insn->variant.unop.dst = lookupHRegRemap(m, insn->variant.unop.dst);
+      s390_opnd_RMI_map_regs(m, &insn->variant.unop.src);
+      break;
+
+   case S390_INSN_TEST:
+      s390_opnd_RMI_map_regs(m, &insn->variant.test.src);
+      break;
+
+   case S390_INSN_CC2BOOL:
+      insn->variant.cc2bool.dst = lookupHRegRemap(m, insn->variant.cc2bool.dst);
+      break;
+
+   case S390_INSN_CAS:
+      insn->variant.cas.op1 = lookupHRegRemap(m, insn->variant.cas.op1);
+      s390_amode_map_regs(m, insn->variant.cas.op2);
+      insn->variant.cas.op3 = lookupHRegRemap(m, insn->variant.cas.op3);
+      insn->variant.cas.old_mem = lookupHRegRemap(m, insn->variant.cas.old_mem);
+      break;
+
+   case S390_INSN_COMPARE:
+      insn->variant.compare.src1 = lookupHRegRemap(m, insn->variant.compare.src1);
+      s390_opnd_RMI_map_regs(m, &insn->variant.compare.src2);
+      break;
+
+   case S390_INSN_BRANCH:
+      s390_opnd_RMI_map_regs(m, &insn->variant.branch.dst);
+      /* No need to map S390_REGNO_RETURN_VALUE. It's not virtual */
+      break;
+
+   case S390_INSN_HELPER_CALL:
+      /* s390_insn_helper_call_emit also reads / writes the link register
+         and stack pointer. But those registers are not visible to the
+         register allocator. So we don't need to do anything for them.
+         As for the arguments of the helper call -- they will be loaded into
+         non-virtual registers. Again, we don't need to do anything for those
+         here. */
+      break;
+
+   case S390_INSN_BFP_TRIOP:
+      insn->variant.bfp_triop.dst = lookupHRegRemap(m, insn->variant.bfp_triop.dst);
+      insn->variant.bfp_triop.op2 = lookupHRegRemap(m, insn->variant.bfp_triop.op2);
+      insn->variant.bfp_triop.op3 = lookupHRegRemap(m, insn->variant.bfp_triop.op3);
+      break;
+
+   case S390_INSN_BFP_BINOP:
+      insn->variant.bfp_binop.dst = lookupHRegRemap(m, insn->variant.bfp_binop.dst);
+      insn->variant.bfp_binop.op2 = lookupHRegRemap(m, insn->variant.bfp_binop.op2);
+      break;
+
+   case S390_INSN_BFP_UNOP:
+      insn->variant.bfp_unop.dst = lookupHRegRemap(m, insn->variant.bfp_unop.dst);
+      insn->variant.bfp_unop.op  = lookupHRegRemap(m, insn->variant.bfp_unop.op);
+      break;
+
+   case S390_INSN_BFP_COMPARE:
+      insn->variant.bfp_compare.dst = lookupHRegRemap(m, insn->variant.bfp_compare.dst);
+      insn->variant.bfp_compare.op1 = lookupHRegRemap(m, insn->variant.bfp_compare.op1);
+      insn->variant.bfp_compare.op2 = lookupHRegRemap(m, insn->variant.bfp_compare.op2);
+      break;
+
+   case S390_INSN_BFP128_BINOP:
+      insn->variant.bfp128_binop.dst_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_binop.dst_hi);
+      insn->variant.bfp128_binop.dst_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_binop.dst_lo);
+      insn->variant.bfp128_binop.op2_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_binop.op2_hi);
+      insn->variant.bfp128_binop.op2_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_binop.op2_lo);
+      break;
+
+   case S390_INSN_BFP128_COMPARE:
+      insn->variant.bfp128_compare.dst =
+         lookupHRegRemap(m, insn->variant.bfp128_compare.dst);
+      insn->variant.bfp128_compare.op1_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_compare.op1_hi);
+      insn->variant.bfp128_compare.op1_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_compare.op1_lo);
+      insn->variant.bfp128_compare.op2_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_compare.op2_hi);
+      insn->variant.bfp128_compare.op2_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_compare.op2_lo);
+      break;
+
+   case S390_INSN_BFP128_UNOP:
+      insn->variant.bfp128_unop.dst_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.dst_hi);
+      insn->variant.bfp128_unop.dst_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.dst_lo);
+      insn->variant.bfp128_unop.op_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.op_hi);
+      insn->variant.bfp128_unop.op_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.op_lo);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_TO:
+      insn->variant.bfp128_unop.dst_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.dst_hi);
+      insn->variant.bfp128_unop.dst_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.dst_lo);
+      insn->variant.bfp128_unop.op_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.op_hi);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_FROM:
+      insn->variant.bfp128_unop.dst_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.dst_hi);
+      insn->variant.bfp128_unop.op_hi =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.op_hi);
+      insn->variant.bfp128_unop.op_lo =
+         lookupHRegRemap(m, insn->variant.bfp128_unop.op_lo);
+      break;
+
+   case S390_INSN_MFENCE:
+      break;
+
+   default:
+      vpanic("s390_insn_map_regs");
+   }
+}
+
+
+/* Return True, if INSN is a move between two registers of the same class.
+   In that case assign the source and destination registers to SRC and DST,
+   respectively. */
+static Bool
+s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst)
+{
+   if (insn->tag == S390_INSN_MOVE &&
+       hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
+      *src = insn->variant.move.src;
+      *dst = insn->variant.move.dst;
+      return True;
+   }
+
+   return False;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit a sequence of bytes                ---*/
+/*------------------------------------------------------------*/
+
+static __inline__ UChar *
+emit_2bytes(UChar *p, ULong val)
+{
+   return (UChar *)__builtin_memcpy(p, ((UChar *)&val) + 6, 2) + 2;
+}
+
+
+static __inline__ UChar *
+emit_4bytes(UChar *p, ULong val)
+{
+   return (UChar *)__builtin_memcpy(p, ((UChar *)&val) + 4, 4) + 4;
+}
+
+
+static __inline__ UChar *
+emit_6bytes(UChar *p, ULong val)
+{
+   return (UChar *)__builtin_memcpy(p, ((UChar *)&val) + 2, 6) + 6;
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit various instruction formats        ---*/
+/*------------------------------------------------------------*/
+
+static UChar *
+emit_RI(UChar *p, UInt op, UChar r1, UShort i2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 20;
+   the_insn |= ((ULong)i2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RIL(UChar *p, ULong op, UChar r1, UInt i2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 36;
+   the_insn |= ((ULong)i2) << 0;
+
+   return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RR(UChar *p, UInt op, UChar r1, UChar r2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 4;
+   the_insn |= ((ULong)r2) << 0;
+
+   return emit_2bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RRE(UChar *p, UInt op, UChar r1, UChar r2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 4;
+   the_insn |= ((ULong)r2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RRF(UChar *p, UInt op, UChar r1, UChar r3, UChar r2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 12;
+   the_insn |= ((ULong)r3) << 4;
+   the_insn |= ((ULong)r2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RRF3(UChar *p, UInt op, UChar r3, UChar r1, UChar r2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r3) << 12;
+   the_insn |= ((ULong)r1) << 4;
+   the_insn |= ((ULong)r2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RS(UChar *p, UInt op, UChar r1, UChar r3, UChar b2, UShort d2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 20;
+   the_insn |= ((ULong)r3) << 16;
+   the_insn |= ((ULong)b2) << 12;
+   the_insn |= ((ULong)d2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RSY(UChar *p, ULong op, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 36;
+   the_insn |= ((ULong)r3) << 32;
+   the_insn |= ((ULong)b2) << 28;
+   the_insn |= ((ULong)dl2) << 16;
+   the_insn |= ((ULong)dh2) << 8;
+
+   return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RX(UChar *p, UInt op, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 20;
+   the_insn |= ((ULong)x2) << 16;
+   the_insn |= ((ULong)b2) << 12;
+   the_insn |= ((ULong)d2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_RXY(UChar *p, ULong op, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)r1) << 36;
+   the_insn |= ((ULong)x2) << 32;
+   the_insn |= ((ULong)b2) << 28;
+   the_insn |= ((ULong)dl2) << 16;
+   the_insn |= ((ULong)dh2) << 8;
+
+   return emit_6bytes(p, the_insn);
+}
+
+
+static UChar *
+emit_S(UChar *p, UInt op, UChar b2, UShort d2)
+{
+   ULong the_insn = op;
+
+   the_insn |= ((ULong)b2) << 12;
+   the_insn |= ((ULong)d2) << 0;
+
+   return emit_4bytes(p, the_insn);
+}
+
+
+/*------------------------------------------------------------*/
+/*--- Functions to emit particular instructions            ---*/
+/*------------------------------------------------------------*/
+
+static UChar *
+s390_emit_AR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "ar", r1, r2);
+
+   return emit_RR(p, 0x1a00, r1, r2);
+}
+
+
+static UChar *
+s390_emit_AGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "agr", r1, r2);
+
+   return emit_RRE(p, 0xb9080000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_A(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "a", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x5a000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_AY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ay", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000005aULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_AG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ag", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000008ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_AFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "afi", r1, i2);
+
+   return emit_RIL(p, 0xc20900000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_AGFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "agfi", r1, i2);
+
+   return emit_RIL(p, 0xc20800000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_AH(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "ah", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x4a000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_AHY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ahy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000007aULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_AHI(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "ahi", r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa70a0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_AGHI(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "aghi", r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa70b0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_NR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "nr", r1, r2);
+
+   return emit_RR(p, 0x1400, r1, r2);
+}
+
+
+static UChar *
+s390_emit_NGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "ngr", r1, r2);
+
+   return emit_RRE(p, 0xb9800000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_N(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "n", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x54000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_NY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ny", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000054ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_NG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ng", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000080ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_NIHF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "nihf", r1, i2);
+
+   return emit_RIL(p, 0xc00a00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_NILF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "nilf", r1, i2);
+
+   return emit_RIL(p, 0xc00b00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_NILL(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "nill", r1, i2);
+
+   return emit_RI(p, 0xa5070000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_BASR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "basr", r1, r2);
+
+   return emit_RR(p, 0x0d00, r1, r2);
+}
+
+
+static UChar *
+s390_emit_BCR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(XMNM, GPR), S390_XMNM_BCR, r1, r2);
+
+   return emit_RR(p, 0x0700, r1, r2);
+}
+
+
+static UChar *
+s390_emit_BRC(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(XMNM, PCREL), S390_XMNM_BRC, r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa7040000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_CR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "cr", r1, r2);
+
+   return emit_RR(p, 0x1900, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "cgr", r1, r2);
+
+   return emit_RRE(p, 0xb9200000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_C(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "c", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x59000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_CY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "cy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000059ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "cg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000020ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "cfi", r1, i2);
+
+   return emit_RIL(p, 0xc20d00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_CS(UChar *p, UChar r1, UChar r3, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, UDXB), "cs", r1, r3, d2, 0, b2);
+
+   return emit_RS(p, 0xba000000, r1, r3, b2, d2);
+}
+
+
+static UChar *
+s390_emit_CSY(UChar *p, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), "csy", r1, r3, dh2, dl2, 0, b2);
+
+   return emit_RSY(p, 0xeb0000000014ULL, r1, r3, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CSG(UChar *p, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), "csg", r1, r3, dh2, dl2, 0, b2);
+
+   return emit_RSY(p, 0xeb0000000030ULL, r1, r3, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CLR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "clr", r1, r2);
+
+   return emit_RR(p, 0x1500, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CLGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "clgr", r1, r2);
+
+   return emit_RRE(p, 0xb9210000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CL(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "cl", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x55000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_CLY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "cly", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000055ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CLG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "clg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000021ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_CLFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "clfi", r1, i2);
+
+   return emit_RIL(p, 0xc20f00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_DR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "dr", r1, r2);
+
+   return emit_RR(p, 0x1d00, r1, r2);
+}
+
+
+static UChar *
+s390_emit_D(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "d", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x5d000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_DLR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "dlr", r1, r2);
+
+   return emit_RRE(p, 0xb9970000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DLGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "dlgr", r1, r2);
+
+   return emit_RRE(p, 0xb9870000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DL(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "dl", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000097ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_DLG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "dlg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000087ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_DSGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "dsgr", r1, r2);
+
+   return emit_RRE(p, 0xb90d0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DSG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "dsg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000000dULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_XR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "xr", r1, r2);
+
+   return emit_RR(p, 0x1700, r1, r2);
+}
+
+
+static UChar *
+s390_emit_XGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "xgr", r1, r2);
+
+   return emit_RRE(p, 0xb9820000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_X(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "x", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x57000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_XY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "xy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000057ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_XG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "xg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000082ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_XIHF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "xihf", r1, i2);
+
+   return emit_RIL(p, 0xc00600000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_XILF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "xilf", r1, i2);
+
+   return emit_RIL(p, 0xc00700000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_FLOGR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "flogr", r1, r2);
+
+   return emit_RRE(p, 0xb9830000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_IC(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "ic", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x43000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_ICY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "icy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000073ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_IIHF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iihf", r1, i2);
+
+   return emit_RIL(p, 0xc00800000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IIHH(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iihh", r1, i2);
+
+   return emit_RI(p, 0xa5000000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IIHL(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iihl", r1, i2);
+
+   return emit_RI(p, 0xa5010000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IILF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iilf", r1, i2);
+
+   return emit_RIL(p, 0xc00900000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IILH(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iilh", r1, i2);
+
+   return emit_RI(p, 0xa5020000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IILL(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "iill", r1, i2);
+
+   return emit_RI(p, 0xa5030000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_IPM(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, GPR), "ipm", r1);
+
+   return emit_RRE(p, 0xb2220000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lr", r1, r2);
+
+   return emit_RR(p, 0x1800, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lgr", r1, r2);
+
+   return emit_RRE(p, 0xb9040000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LGFR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lgfr", r1, r2);
+
+   return emit_RRE(p, 0xb9140000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_L(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "l", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x58000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_LY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ly", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000058ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000004ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LGF(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lgf", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000014ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LGFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "lgfi", r1, i2);
+
+   return emit_RIL(p, 0xc00100000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LTR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "ltr", r1, r2);
+
+   return emit_RR(p, 0x1200, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LTGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "ltgr", r1, r2);
+
+   return emit_RRE(p, 0xb9020000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LT(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lt", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000012ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LTG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ltg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000002ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LBR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lbr", r1, r2);
+
+   return emit_RRE(p, 0xb9260000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LGBR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lgbr", r1, r2);
+
+   return emit_RRE(p, 0xb9060000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LB(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lb", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000076ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LGB(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lgb", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000077ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LCR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lcr", r1, r2);
+
+   return emit_RR(p, 0x1300, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LCGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lcgr", r1, r2);
+
+   return emit_RRE(p, 0xb9030000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LHR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lhr", r1, r2);
+
+   return emit_RRE(p, 0xb9270000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LGHR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "lghr", r1, r2);
+
+   return emit_RRE(p, 0xb9070000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LH(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "lh", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x48000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_LHY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lhy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000078ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LGH(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "lgh", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000015ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LHI(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "lhi", r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa7080000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LGHI(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "lghi", r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa7090000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LLGFR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "llgfr", r1, r2);
+
+   return emit_RRE(p, 0xb9160000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LLGF(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "llgf", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000016ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LLCR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "llcr", r1, r2);
+
+   return emit_RRE(p, 0xb9940000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LLGCR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "llgcr", r1, r2);
+
+   return emit_RRE(p, 0xb9840000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LLC(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "llc", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000094ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LLGC(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "llgc", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000090ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LLHR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "llhr", r1, r2);
+
+   return emit_RRE(p, 0xb9950000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LLGHR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "llghr", r1, r2);
+
+   return emit_RRE(p, 0xb9850000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LLH(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "llh", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000095ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LLGH(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "llgh", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000091ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LLILF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "llilf", r1, i2);
+
+   return emit_RIL(p, 0xc00f00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LLILH(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "llilh", r1, i2);
+
+   return emit_RI(p, 0xa50e0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LLILL(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "llill", r1, i2);
+
+   return emit_RI(p, 0xa50f0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_MR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "mr", r1, r2);
+
+   return emit_RR(p, 0x1c00, r1, r2);
+}
+
+
+static UChar *
+s390_emit_M(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "m", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x5c000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_MFY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_gie);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "mfy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000005cULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MH(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "mh", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x4c000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_MHY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_gie);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "mhy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000007cULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MHI(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "mhi", r1, (Int)(Short)i2);
+
+   return emit_RI(p, 0xa70c0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_MLR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "mlr", r1, r2);
+
+   return emit_RRE(p, 0xb9960000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MLGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "mlgr", r1, r2);
+
+   return emit_RRE(p, 0xb9860000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_ML(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "ml", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000096ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MLG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "mlg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000086ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MSR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "msr", r1, r2);
+
+   return emit_RRE(p, 0xb2520000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MSGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "msgr", r1, r2);
+
+   return emit_RRE(p, 0xb90c0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MS(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "ms", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x71000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_MSY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "msy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000051ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MSG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "msg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000000cULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_MSFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_gie);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "msfi", r1, i2);
+
+   return emit_RIL(p, 0xc20100000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_MSGFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_gie);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, INT), "msgfi", r1, i2);
+
+   return emit_RIL(p, 0xc20000000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_OR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "or", r1, r2);
+
+   return emit_RR(p, 0x1600, r1, r2);
+}
+
+
+static UChar *
+s390_emit_OGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "ogr", r1, r2);
+
+   return emit_RRE(p, 0xb9810000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_O(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "o", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x56000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_OY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "oy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000056ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_OG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "og", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000081ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_OIHF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "oihf", r1, i2);
+
+   return emit_RIL(p, 0xc00c00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_OILF(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "oilf", r1, i2);
+
+   return emit_RIL(p, 0xc00d00000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_OILL(UChar *p, UChar r1, UShort i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "oill", r1, i2);
+
+   return emit_RI(p, 0xa50b0000, r1, i2);
+}
+
+
+static UChar *
+s390_emit_SLL(UChar *p, UChar r1, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "sll", r1, d2, 0, b2);
+
+   return emit_RS(p, 0x89000000, r1, 0, b2, d2);
+}
+
+
+static UChar *
+s390_emit_SLLG(UChar *p, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), "sllg", r1, r3, dh2, dl2, 0, b2);
+
+   return emit_RSY(p, 0xeb000000000dULL, r1, r3, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SRA(UChar *p, UChar r1, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "sra", r1, d2, 0, b2);
+
+   return emit_RS(p, 0x8a000000, r1, 0, b2, d2);
+}
+
+
+static UChar *
+s390_emit_SRAG(UChar *p, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), "srag", r1, r3, dh2, dl2, 0, b2);
+
+   return emit_RSY(p, 0xeb000000000aULL, r1, r3, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SRL(UChar *p, UChar r1, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "srl", r1, d2, 0, b2);
+
+   return emit_RS(p, 0x88000000, r1, 0, b2, d2);
+}
+
+
+static UChar *
+s390_emit_SRLG(UChar *p, UChar r1, UChar r3, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, GPR, SDXB), "srlg", r1, r3, dh2, dl2, 0, b2);
+
+   return emit_RSY(p, 0xeb000000000cULL, r1, r3, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_ST(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "st", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x50000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_STY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "sty", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000050ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_STG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "stg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000024ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_STC(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "stc", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x42000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_STCY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "stcy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000072ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_STH(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "sth", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x40000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_STHY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "sthy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000070ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "sr", r1, r2);
+
+   return emit_RR(p, 0x1b00, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SGR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, GPR), "sgr", r1, r2);
+
+   return emit_RRE(p, 0xb9090000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_S(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "s", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x5b000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_SY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "sy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000005bULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SG(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "sg", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe30000000009ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SH(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UDXB), "sh", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x4b000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_SHY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, SDXB), "shy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xe3000000007bULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_SLFI(UChar *p, UChar r1, UInt i2)
+{
+   vassert(s390_host_has_eimm);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "slfi", r1, i2);
+
+   return emit_RIL(p, 0xc20500000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_SLGFI(UChar *p, UChar r1, UInt i2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, UINT), "slgfi", r1, i2);
+
+   return emit_RIL(p, 0xc20400000000ULL, r1, i2);
+}
+
+
+static UChar *
+s390_emit_LDR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "ldr", r1, r2);
+
+   return emit_RR(p, 0x2800, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LE(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, UDXB), "le", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x78000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_LD(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, UDXB), "ld", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x68000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_LEY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, SDXB), "ley", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xed0000000064ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LDY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, SDXB), "ldy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xed0000000065ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_LFPC(UChar *p, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, UDXB), "lfpc", d2, 0, b2);
+
+   return emit_S(p, 0xb29d0000, b2, d2);
+}
+
+
+static UChar *
+s390_emit_LDGR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_fgx);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "ldgr", r1, r2);
+
+   return emit_RRE(p, 0xb3c10000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LGDR(UChar *p, UChar r1, UChar r2)
+{
+   vassert(s390_host_has_fgx);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, GPR, FPR), "lgdr", r1, r2);
+
+   return emit_RRE(p, 0xb3cd0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LZER(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, FPR), "lzer", r1);
+
+   return emit_RRE(p, 0xb3740000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LZDR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, FPR), "lzdr", r1);
+
+   return emit_RRE(p, 0xb3750000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SFPC(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, GPR), "sfpc", r1);
+
+   return emit_RRE(p, 0xb3840000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_STE(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, UDXB), "ste", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x70000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_STD(UChar *p, UChar r1, UChar x2, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, UDXB), "std", r1, d2, x2, b2);
+
+   return emit_RX(p, 0x60000000, r1, x2, b2, d2);
+}
+
+
+static UChar *
+s390_emit_STEY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, SDXB), "stey", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xed0000000066ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_STDY(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, SDXB), "stdy", r1, dh2, dl2, x2, b2);
+
+   return emit_RXY(p, 0xed0000000067ULL, r1, x2, b2, dl2, dh2);
+}
+
+
+static UChar *
+s390_emit_STFPC(UChar *p, UChar b2, UShort d2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC2(MNM, UDXB), "stfpc", d2, 0, b2);
+
+   return emit_S(p, 0xb29c0000, b2, d2);
+}
+
+
+static UChar *
+s390_emit_AEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "aebr", r1, r2);
+
+   return emit_RRE(p, 0xb30a0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_ADBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "adbr", r1, r2);
+
+   return emit_RRE(p, 0xb31a0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_AXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "axbr", r1, r2);
+
+   return emit_RRE(p, 0xb34a0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "cebr", r1, r2);
+
+   return emit_RRE(p, 0xb3090000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "cdbr", r1, r2);
+
+   return emit_RRE(p, 0xb3190000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "cxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3490000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CEFBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cefbr", r1, r2);
+
+   return emit_RRE(p, 0xb3940000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CDFBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cdfbr", r1, r2);
+
+   return emit_RRE(p, 0xb3950000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CXFBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cxfbr", r1, r2);
+
+   return emit_RRE(p, 0xb3960000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CEGBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cegbr", r1, r2);
+
+   return emit_RRE(p, 0xb3a40000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CDGBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cdgbr", r1, r2);
+
+   return emit_RRE(p, 0xb3a50000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CXGBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, GPR), "cxgbr", r1, r2);
+
+   return emit_RRE(p, 0xb3a60000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CFEBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cfebr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb3980000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CFDBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cfdbr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb3990000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CFXBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cfxbr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb39a0000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CGEBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cgebr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb3a80000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CGDBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cgdbr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb3a90000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_CGXBR(UChar *p, UChar r3, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, GPR, UINT, FPR), "cgxbr", r1, r3, r2);
+
+   return emit_RRF3(p, 0xb3aa0000, r3, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "debr", r1, r2);
+
+   return emit_RRE(p, 0xb30d0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "ddbr", r1, r2);
+
+   return emit_RRE(p, 0xb31d0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_DXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "dxbr", r1, r2);
+
+   return emit_RRE(p, 0xb34d0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LCEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lcebr", r1, r2);
+
+   return emit_RRE(p, 0xb3030000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LCDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lcdbr", r1, r2);
+
+   return emit_RRE(p, 0xb3130000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LCXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lcxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3430000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LDEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "ldebr", r1, r2);
+
+   return emit_RRE(p, 0xb3040000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LXDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lxdbr", r1, r2);
+
+   return emit_RRE(p, 0xb3050000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LXEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lxebr", r1, r2);
+
+   return emit_RRE(p, 0xb3060000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LNEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lnebr", r1, r2);
+
+   return emit_RRE(p, 0xb3010000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LNDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lndbr", r1, r2);
+
+   return emit_RRE(p, 0xb3110000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LNXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lnxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3410000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LPEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lpebr", r1, r2);
+
+   return emit_RRE(p, 0xb3000000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LPDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lpdbr", r1, r2);
+
+   return emit_RRE(p, 0xb3100000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LPXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lpxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3400000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LEDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "ledbr", r1, r2);
+
+   return emit_RRE(p, 0xb3440000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LDXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "ldxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3450000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_LEXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "lexbr", r1, r2);
+
+   return emit_RRE(p, 0xb3460000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MEEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "meebr", r1, r2);
+
+   return emit_RRE(p, 0xb3170000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "mdbr", r1, r2);
+
+   return emit_RRE(p, 0xb31c0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "mxbr", r1, r2);
+
+   return emit_RRE(p, 0xb34c0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_MAEBR(UChar *p, UChar r1, UChar r3, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), "maebr", r1, r3, r2);
+
+   return emit_RRF(p, 0xb30e0000, r1, r3, r2);
+}
+
+
+static UChar *
+s390_emit_MADBR(UChar *p, UChar r1, UChar r3, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), "madbr", r1, r3, r2);
+
+   return emit_RRF(p, 0xb31e0000, r1, r3, r2);
+}
+
+
+static UChar *
+s390_emit_MSEBR(UChar *p, UChar r1, UChar r3, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), "msebr", r1, r3, r2);
+
+   return emit_RRF(p, 0xb30f0000, r1, r3, r2);
+}
+
+
+static UChar *
+s390_emit_MSDBR(UChar *p, UChar r1, UChar r3, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC4(MNM, FPR, FPR, FPR), "msdbr", r1, r3, r2);
+
+   return emit_RRF(p, 0xb31f0000, r1, r3, r2);
+}
+
+
+static UChar *
+s390_emit_SQEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sqebr", r1, r2);
+
+   return emit_RRE(p, 0xb3140000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SQDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sqdbr", r1, r2);
+
+   return emit_RRE(p, 0xb3150000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SQXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sqxbr", r1, r2);
+
+   return emit_RRE(p, 0xb3160000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SEBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sebr", r1, r2);
+
+   return emit_RRE(p, 0xb30b0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SDBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sdbr", r1, r2);
+
+   return emit_RRE(p, 0xb31b0000, r1, r2);
+}
+
+
+static UChar *
+s390_emit_SXBR(UChar *p, UChar r1, UChar r2)
+{
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+      s390_disasm(ENC3(MNM, FPR, FPR), "sxbr", r1, r2);
+
+   return emit_RRE(p, 0xb34b0000, r1, r2);
+}
+
+
+/* Provide a symbolic name for register "R0" */
+#define R0 0
+
+/* Split up a 20-bit displacement into its high and low piece
+   suitable for passing as function arguments */
+#define DISP20(d) (((UInt)d) & 0xFFF), ((((UInt)d) >> 12) & 0xFF)
+
+/*---------------------------------------------------------------*/
+/*--- Helper functions                                        ---*/
+/*---------------------------------------------------------------*/
+
+static __inline__ Bool
+uint_fits_signed_16bit(UInt val)
+{
+   int v = val & 0xFFFFu;
+
+   /* sign extend */
+   v = (v << 16) >> 16;
+
+   return val == (UInt)v;
+}
+
+
+static __inline__ Bool
+ulong_fits_signed_16bit(ULong val)
+{
+   Long v = val & 0xFFFFu;
+
+   /* sign extend */
+   v = (v << 48) >> 48;
+
+   return val == (ULong)v;
+}
+
+
+static __inline__ Bool
+ulong_fits_signed_32bit(ULong val)
+{
+   Long v = val & 0xFFFFFFFFu;
+
+   /* sign extend */
+   v = (v << 32) >> 32;
+
+   return val == (ULong)v;
+}
+
+
+static __inline__ Bool
+ulong_fits_unsigned_32bit(ULong val)
+{
+   return (val & 0xFFFFFFFFu) == val;
+}
+
+
+/* Load a 64-bit immediate VAL into register REG. */
+static UChar *
+s390_emit_load_64imm(UChar *p, UChar reg, ULong val)
+{
+   if (ulong_fits_signed_16bit(val)) {
+      return s390_emit_LGHI(p, reg, val);
+   }
+
+   if (s390_host_has_eimm) {
+      if (ulong_fits_unsigned_32bit(val)) {
+         return s390_emit_LLILF(p, reg, val);
+      }
+      if (ulong_fits_signed_32bit(val)) {
+         /* LGFI's sign extension will recreate the correct 64-bit value */
+         return s390_emit_LGFI(p, reg, val);
+      }
+      /* Do it in two steps: upper half [0:31] and lower half [32:63] */
+      p =  s390_emit_IIHF(p, reg, val >> 32);
+      return s390_emit_IILF(p, reg, val & 0xFFFFFFFF);
+   }
+
+   /* Fall back */
+   if (ulong_fits_unsigned_32bit(val)) {
+      p = s390_emit_LLILH(p, reg, (val >> 16) & 0xFFFF); /* sets val[32:47]
+                                                            val[0:31] = 0 */
+      p = s390_emit_IILL(p, reg, val & 0xFFFF);          /* sets val[48:63] */
+      return p;
+   }
+
+   p = s390_emit_IIHH(p, reg, (val >> 48) & 0xFFFF);
+   p = s390_emit_IIHL(p, reg, (val >> 32) & 0xFFFF);
+   p = s390_emit_IILH(p, reg, (val >> 16) & 0xFFFF);
+   p = s390_emit_IILL(p, reg, val & 0xFFFF);
+
+   return p;
+}
+
+/* Load a 32-bit immediate VAL into register REG. */
+static UChar *
+s390_emit_load_32imm(UChar *p, UChar reg, UInt val)
+{
+   if (uint_fits_signed_16bit(val)) {
+      /* LHI's sign extension will recreate the correct 32-bit value */
+      return s390_emit_LHI(p, reg, val);
+   }
+   if (s390_host_has_eimm) {
+      return s390_emit_IILF(p, reg, val);
+   }
+   /* val[0:15]  --> (val >> 16) & 0xFFFF
+      val[16:31] --> val & 0xFFFF */
+   p = s390_emit_IILH(p, reg, (val >> 16) & 0xFFFF);
+   return s390_emit_IILL(p, reg, val & 0xFFFF);
+}
+
+/*------------------------------------------------------------*/
+/*--- Wrapper functions                                    ---*/
+/*------------------------------------------------------------*/
+
+/* r1[32:63],r1+1[32:63] = r1+1[32:63] * memory[op2addr][0:31] */
+static UChar *
+s390_emit_MFYw(UChar *p, UChar r1, UChar x, UChar b,  UShort dl, UChar dh)
+{
+   if (s390_host_has_gie) {
+      return s390_emit_MFY(p, r1, x, b, dl, dh);
+   }
+
+   /* Load from memory into R0, then MULTIPLY with R1 */
+   p = s390_emit_LY(p, R0, x, b, dl, dh);
+   return s390_emit_MR(p, r1, R0);
+}
+
+/* r1[32:63] = r1[32:63] * memory[op2addr][0:15] */
+static UChar *
+s390_emit_MHYw(UChar *p, UChar r1, UChar x, UChar b,  UShort dl, UChar dh)
+{
+   if (s390_host_has_gie) {
+      return s390_emit_MHY(p, r1, x, b, dl, dh);
+   }
+
+   /* Load from memory into R0, then MULTIPLY with R1 */
+   p = s390_emit_LHY(p, R0, x, b, dl, dh);
+   return s390_emit_MSR(p, r1, R0);
+}
+
+/* r1[32:63] = r1[32:63] * i2 */
+static UChar *
+s390_emit_MSFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_gie) {
+      return s390_emit_MSFI(p, r1, i2);
+   }
+
+   /* Load I2 into R0; then MULTIPLY R0 with R1 */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_MSR(p, r1, R0);
+}
+
+
+/* r1[32:63] = r1[32:63] & i2 */
+static UChar *
+s390_emit_NILFw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_NILF(p, r1, i2);
+   }
+
+   /* Load I2 into R0; then AND R0 with R1 */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_NR(p, r1, R0);
+}
+
+
+/* r1[32:63] = r1[32:63] | i2 */
+static UChar *
+s390_emit_OILFw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_OILF(p, r1, i2);
+   }
+
+   /* Load I2 into R0; then AND R0 with R1 */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_OR(p, r1, R0);
+}
+
+
+/* r1[32:63] = r1[32:63] ^ i2 */
+static UChar *
+s390_emit_XILFw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_XILF(p, r1, i2);
+   }
+
+   /* Load I2 into R0; then AND R0 with R1 */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_XR(p, r1, R0);
+}
+
+
+/* r1[32:63] = sign_extend(mem[op2addr][0:7]) */
+static UChar *
+s390_emit_LBw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (s390_host_has_ldisp) {
+      return s390_emit_LB(p, r1, x2, b2, dl2, dh2);
+   }
+
+   p = s390_emit_IC(p, r1, x2, b2, dl2);    /* r1[56:63] = mem[op2addr][0:7] */
+   p = s390_emit_SLL(p, r1, R0, 24);        /* r1 = r1 << 24  */
+   return s390_emit_SRA(p, r1, R0, 24);     /* r1 = r1 >>a 24 */
+}
+
+
+/*  r1[32:63] = sign_extend(r2[56:63]) */
+static UChar *
+s390_emit_LBRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LBR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);               /* r1 = r2 */
+   p = s390_emit_SLL(p, r1, R0, 24);          /* r1 = r1 << 24  */
+   return s390_emit_SRA(p, r1, R0, 24);       /* r1 = r1 >>a 24 */
+}
+
+
+/* r1[0:63] = sign_extend(mem[op2addr][0:7]) */
+static UChar *
+s390_emit_LGBw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl2, UChar dh2)
+{
+   vassert(s390_host_has_ldisp || dh2 == 0);
+
+   if (s390_host_has_ldisp) {
+      return s390_emit_LGB(p, r1, x2, b2, dl2, dh2);
+   }
+
+   p = s390_emit_IC(p, r1, x2, b2, dl2);             /* r1[56:63] = mem[op2addr][0:7] */
+   p = s390_emit_SLLG(p, r1, r1, R0, DISP20(56));    /* r1 = r1 << 56  */
+   return s390_emit_SRAG(p, r1, r1, R0, DISP20(56)); /* r1 = r1 >>a 56 */
+}
+
+
+/*  r1[0:63] = sign_extend(r2[56:63]) */
+static UChar *
+s390_emit_LGBRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LGBR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);                       /* r1 = r2 */
+   p = s390_emit_SLLG(p, r1, r1, R0, DISP20(56));     /* r1 = r1 << 56  */
+   return s390_emit_SRAG(p, r1, r1, R0, DISP20(56));  /* r1 = r1 >>a 56 */
+}
+
+
+/* r1[32:63] = sign_extend(r2[48:63]) */
+static UChar *
+s390_emit_LHRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LHR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);               /* r1 = r2 */
+   p = s390_emit_SLL(p, r1, R0, 16);          /* r1 = r1 << 16  */
+   return s390_emit_SRA(p, r1, R0, 16);       /* r1 = r1 >>a 16 */
+}
+
+
+/* r1[0:63] = sign_extend(r2[48:63]) */
+static UChar *
+s390_emit_LGHRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LGHR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);               /* r1 = r2 */
+   p = s390_emit_SLLG(p, r1, r1, R0, DISP20(48));     /* r1 = r1 << 48  */
+   return s390_emit_SRAG(p, r1, r1, R0, DISP20(48));  /* r1 = r1 >>a 48 */
+}
+
+
+/* r1[0:63] = sign_extend(i2) */
+static UChar *
+s390_emit_LGFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LGFI(p, r1, i2);
+   }
+
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_LGFR(p, r1, R0);
+}
+
+
+/* r1[32:63] = zero_extend($r2[56:63]) */
+static UChar *
+s390_emit_LLCRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLCR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);
+   p = s390_emit_LHI(p, R0, 0xFF);
+   return s390_emit_NR(p, r1, R0);
+}
+
+
+/* r1[0:63] = zero_extend($r2[56:63]) */
+static UChar *
+s390_emit_LLGCRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLGCR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);
+   p = s390_emit_LLILL(p, R0, 0xFF);
+   return s390_emit_NGR(p, r1, R0);
+}
+
+
+/* r1[32:63] = zero_extend(r2[48:63]) */
+static UChar *
+s390_emit_LLHRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLHR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);
+   p = s390_emit_LLILL(p, R0, 0xFFFF);
+   return s390_emit_NR(p, r1, R0);
+}
+
+
+/* r1[0:63] = zero_extend(r2[48:63]) */
+static UChar *
+s390_emit_LLGHRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLGHR(p, r1, r2);
+   }
+
+   p = s390_emit_LR(p, r1, r2);
+   p = s390_emit_LLILL(p, R0, 0xFFFF);
+   return s390_emit_NGR(p, r1, R0);
+}
+
+
+/* r1[32:63] = zero_extend(mem[op2addr][0:7]) */
+static UChar *
+s390_emit_LLCw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl, UChar dh)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLC(p, r1, x2, b2, dl, dh);
+   }
+
+   if (dh == 0) {
+      p = s390_emit_IC(p, r1, x2, b2, dl);
+   } else {
+      p = s390_emit_ICY(p, r1, x2, b2, dl, dh);
+   }
+   p = s390_emit_LLILL(p, R0, 0xFF);
+   return s390_emit_NR(p, r1, R0);
+}
+
+
+/* r1[32:63] = zero_extend(mem[op2addr][0:15]) */
+static UChar *
+s390_emit_LLHw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl, UChar dh)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLH(p, r1, x2, b2, dl, dh);
+   }
+
+   p = s390_emit_LLGH(p, r1, x2, b2, dl, dh);
+   p = s390_emit_LLILL(p, R0, 0xFFFF);
+   return s390_emit_NR(p, r1, R0);
+}
+
+
+/* r1[0:63] = zero_extend(i2) */
+static UChar *
+s390_emit_LLILFw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LLILF(p, r1, i2);
+   }
+
+   p = s390_emit_LLILH(p, r1, (i2 >> 16) & 0xFFFF);  /* i2[0:15] */
+   return s390_emit_OILL(p, r1, i2 & 0xFFFF);
+}
+
+
+/* r1[32:63] = r1[32:63] + i2 */
+static UChar *
+s390_emit_AFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_AFI(p, r1, i2);
+   }
+   /* Load 32 bit immediate to R0 then add */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_AR(p, r1, R0);
+}
+
+
+/* r1[32:63] = r1[32:63] - i2 */
+static UChar *
+s390_emit_SLFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_SLFI(p, r1, i2);
+   }
+
+   /* Load 32 bit immediate to R0 then subtract */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_SR(p, r1, R0);
+}
+
+
+/* r1[0:63] = r1[0:63] - zero_extend(i2) */
+static UChar *
+s390_emit_SLGFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_SLGFI(p, r1, i2);
+   }
+
+   /* Load zero-extended 32 bit immediate to R0 then subtract */
+   p = s390_emit_load_64imm(p, R0, i2);
+   return s390_emit_SGR(p, r1, R0);
+}
+
+
+static UChar *
+s390_emit_LTw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl, UChar dh)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LT(p, r1, x2, b2, dl, dh);
+   }
+   /* Load 32 bit from memory to R0 then compare */
+   if (dh == 0) {
+      p = s390_emit_L(p, R0, x2, b2, dl);
+   } else {
+      p = s390_emit_LY(p, R0, x2, b2, dl, dh);
+   }
+   return s390_emit_LTR(p, r1, R0);
+}
+
+
+static UChar *
+s390_emit_LTGw(UChar *p, UChar r1, UChar x2, UChar b2, UShort dl, UChar dh)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_LTG(p, r1, x2, b2, dl, dh);
+   }
+   /* Load 64 bit from memory to R0 then compare */
+   p = s390_emit_LG(p, R0, x2, b2, dl, dh);
+   return s390_emit_LTGR(p, r1, R0);
+}
+
+
+static UChar *
+s390_emit_CFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_CFI(p, r1, i2);
+   }
+   /* Load 32 bit immediate to R0 then compare */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_CR(p, r1, R0);
+}
+
+
+static UChar *
+s390_emit_CLFIw(UChar *p, UChar r1, UInt i2)
+{
+   if (s390_host_has_eimm) {
+      return s390_emit_CLFI(p, r1, i2);
+   }
+   /* Load 32 bit immediate to R0 then compare */
+   p = s390_emit_load_32imm(p, R0, i2);
+   return s390_emit_CLR(p, r1, R0);
+}
+
+
+static UChar *
+s390_emit_LGDRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_fgx) {
+      return s390_emit_LGDR(p, r1, r2);
+   }
+
+   /* Store the FPR at memory[sp - 8]. This is safe because SP grows towards
+      smaller addresses and is 8-byte aligned. Then load the GPR from that
+      memory location/ */
+   if (s390_host_has_ldisp) {
+      p = s390_emit_STDY(p, r2, R0, S390_REGNO_STACK_POINTER, DISP20(-8));
+      return s390_emit_LG(p, r1, R0, S390_REGNO_STACK_POINTER, DISP20(-8));
+   }
+
+   /* No long displacement. Need to adjust SP explicitly as to avoid negative
+      displacements. */
+   p = s390_emit_AGHI(p, S390_REGNO_STACK_POINTER, -8);
+   p = s390_emit_STD(p, r2, R0, S390_REGNO_STACK_POINTER, 0);
+   p = s390_emit_LG(p, r1, R0, S390_REGNO_STACK_POINTER, DISP20(0));
+   return s390_emit_AGHI(p, S390_REGNO_STACK_POINTER, 8);
+}
+
+
+static UChar *
+s390_emit_LDGRw(UChar *p, UChar r1, UChar r2)
+{
+   if (s390_host_has_fgx) {
+      return s390_emit_LDGR(p, r1, r2);
+   }
+
+   /* Store the GPR at memory[sp - 8]. This is safe because SP grows towards
+      smaller addresses and is 8-byte aligned. Then load the FPR from that
+      memory location/ */
+   if (s390_host_has_ldisp) {
+      p = s390_emit_STG(p, r2, R0, S390_REGNO_STACK_POINTER, DISP20(-8));
+      return s390_emit_LDY(p, r1, R0, S390_REGNO_STACK_POINTER, DISP20(-8));
+   }
+
+   /* No long displacement. Need to adjust SP explicitly as to avoid negative
+      displacements. */
+   p = s390_emit_AGHI(p, S390_REGNO_STACK_POINTER, -8);
+   p = s390_emit_STG(p, r2, R0, S390_REGNO_STACK_POINTER, DISP20(0));
+   p = s390_emit_LD(p, r1, R0, S390_REGNO_STACK_POINTER, 0);
+   return s390_emit_AGHI(p, S390_REGNO_STACK_POINTER, 8);
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Constructors for the various s390_insn kinds            ---*/
+/*---------------------------------------------------------------*/
+
+s390_insn *
+s390_insn_load(UChar size, HReg dst, s390_amode *src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_LOAD;
+   insn->size = size;
+   insn->variant.load.src  = src;
+   insn->variant.load.dst  = dst;
+
+   vassert(size == 1 || size == 2 || size == 4 || size == 8);
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_store(UChar size, s390_amode *dst, HReg src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_STORE;
+   insn->size = size;
+   insn->variant.store.src  = src;
+   insn->variant.store.dst  = dst;
+
+   vassert(size == 1 || size == 2 || size == 4 || size == 8);
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_move(UChar size, HReg dst, HReg src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_MOVE;
+   insn->size = size;
+   insn->variant.move.src  = src;
+   insn->variant.move.dst  = dst;
+
+   vassert(size == 1 || size == 2 || size == 4 || size == 8);
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_cond_move(UChar size, s390_cc_t cond, HReg dst, s390_opnd_RMI src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_COND_MOVE;
+   insn->size = size;
+   insn->variant.cond_move.cond = cond;
+   insn->variant.cond_move.src  = src;
+   insn->variant.cond_move.dst  = dst;
+
+   vassert(size == 1 || size == 2 || size == 4 || size == 8);
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_load_immediate(UChar size, HReg dst, ULong value)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_LOAD_IMMEDIATE;
+   insn->size = size;
+   insn->variant.load_immediate.dst   = dst;
+   insn->variant.load_immediate.value = value;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_alu(UChar size, s390_alu_t tag, HReg dst, s390_opnd_RMI op2)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_ALU;
+   insn->size = size;
+   insn->variant.alu.tag = tag;
+   insn->variant.alu.dst = dst;
+   insn->variant.alu.op2 = op2;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_mul(UChar size, HReg dst_hi, HReg dst_lo, s390_opnd_RMI op2,
+              Bool signed_multiply)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(! hregIsVirtual(dst_hi));
+   vassert(! hregIsVirtual(dst_lo));
+
+   insn->tag  = S390_INSN_MUL;
+   insn->size = size;
+   insn->variant.mul.dst_hi = dst_hi;
+   insn->variant.mul.dst_lo = dst_lo;
+   insn->variant.mul.op2 = op2;
+   insn->variant.mul.signed_multiply = signed_multiply;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_div(UChar size, HReg op1_hi, HReg op1_lo, s390_opnd_RMI op2,
+              Bool signed_divide)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 4 || size == 8);
+   vassert(! hregIsVirtual(op1_hi));
+   vassert(! hregIsVirtual(op1_lo));
+
+   insn->tag  = S390_INSN_DIV;
+   insn->size = size;
+   insn->variant.div.op1_hi = op1_hi;
+   insn->variant.div.op1_lo = op1_lo;
+   insn->variant.div.op2 = op2;
+   insn->variant.div.signed_divide = signed_divide;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_divs(UChar size, HReg rem, HReg op1, s390_opnd_RMI op2)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 8);
+   vassert(! hregIsVirtual(op1));
+   vassert(! hregIsVirtual(rem));
+
+   insn->tag  = S390_INSN_DIVS;
+   insn->size = size;
+   insn->variant.divs.rem = rem;   /* remainder */
+   insn->variant.divs.op1 = op1;   /* also quotient */
+   insn->variant.divs.op2 = op2;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_clz(UChar size, HReg num_bits, HReg clobber, s390_opnd_RMI src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 8);
+   vassert(! hregIsVirtual(num_bits));
+   vassert(! hregIsVirtual(clobber));
+
+   insn->tag  = S390_INSN_CLZ;
+   insn->size = size;
+   insn->variant.clz.num_bits = num_bits;
+   insn->variant.clz.clobber  = clobber;
+   insn->variant.clz.src = src;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_unop(UChar size, s390_unop_t tag, HReg dst, s390_opnd_RMI opnd)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_UNOP;
+   insn->size = size;
+   insn->variant.unop.tag = tag;
+   insn->variant.unop.dst = dst;
+   insn->variant.unop.src = opnd;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_test(UChar size, s390_opnd_RMI src)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 4 || size == 8);
+
+   insn->tag  = S390_INSN_TEST;
+   insn->size = size;
+   insn->variant.test.src = src;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_cc2bool(HReg dst, s390_cc_t cond)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_CC2BOOL;
+   insn->size = 0;   /* does not matter */
+   insn->variant.cc2bool.cond = cond;
+   insn->variant.cc2bool.dst  = dst;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_cas(UChar size, HReg op1, s390_amode *op2, HReg op3, HReg old_mem)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 4 || size == 8);
+   vassert(op2->x == 0);
+
+   insn->tag  = S390_INSN_CAS;
+   insn->size = size;
+   insn->variant.cas.op1 = op1;
+   insn->variant.cas.op2 = op2;
+   insn->variant.cas.op3 = op3;
+   insn->variant.cas.old_mem = old_mem;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_compare(UChar size, HReg src1, s390_opnd_RMI src2,
+                  Bool signed_comparison)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 4 || size == 8);
+
+   insn->tag  = S390_INSN_COMPARE;
+   insn->size = size;
+   insn->variant.compare.src1 = src1;
+   insn->variant.compare.src2 = src2;
+   insn->variant.compare.signed_comparison = signed_comparison;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_branch(IRJumpKind kind, s390_cc_t cond, s390_opnd_RMI dst)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BRANCH;
+   insn->size = 0;  /* does not matter */
+   insn->variant.branch.kind = kind;
+   insn->variant.branch.dst  = dst;
+   insn->variant.branch.cond = cond;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_helper_call(s390_cc_t cond, Addr64 target, UInt num_args,
+                      HChar *name)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_HELPER_CALL;
+   insn->size = 0;  /* does not matter */
+   insn->variant.helper_call.cond = cond;
+   insn->variant.helper_call.target = target;
+   insn->variant.helper_call.num_args = num_args;
+   insn->variant.helper_call.name = name;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp_triop(UChar size, s390_bfp_triop_t tag, HReg dst, HReg op2,
+                    HReg op3, s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP_TRIOP;
+   insn->size = size;
+   insn->variant.bfp_triop.tag = tag;
+   insn->variant.bfp_triop.dst = dst;
+   insn->variant.bfp_triop.op2 = op2;
+   insn->variant.bfp_triop.op3 = op3;
+   insn->variant.bfp_triop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp_binop(UChar size, s390_bfp_binop_t tag, HReg dst, HReg op2,
+                    s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP_BINOP;
+   insn->size = size;
+   insn->variant.bfp_binop.tag = tag;
+   insn->variant.bfp_binop.dst = dst;
+   insn->variant.bfp_binop.op2 = op2;
+   insn->variant.bfp_binop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp_unop(UChar size, s390_bfp_unop_t tag, HReg dst, HReg op,
+                   s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP_UNOP;
+   insn->size = size;
+   insn->variant.bfp_unop.tag = tag;
+   insn->variant.bfp_unop.dst = dst;
+   insn->variant.bfp_unop.op  = op;
+   insn->variant.bfp_unop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp_compare(UChar size, HReg dst, HReg op1, HReg op2)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   vassert(size == 4 || size == 8);
+
+   insn->tag  = S390_INSN_BFP_COMPARE;
+   insn->size = size;
+   insn->variant.bfp_compare.dst = dst;
+   insn->variant.bfp_compare.op1 = op1;
+   insn->variant.bfp_compare.op2 = op2;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp128_binop(UChar size, s390_bfp_binop_t tag, HReg dst_hi,
+                       HReg dst_lo, HReg op2_hi, HReg op2_lo,
+                       s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP128_BINOP;
+   insn->size = size;
+   insn->variant.bfp128_binop.tag = tag;
+   insn->variant.bfp128_binop.dst_hi = dst_hi;
+   insn->variant.bfp128_binop.dst_lo = dst_lo;
+   insn->variant.bfp128_binop.op2_hi = op2_hi;
+   insn->variant.bfp128_binop.op2_lo = op2_lo;
+   insn->variant.bfp128_binop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp128_unop(UChar size, s390_bfp_unop_t tag, HReg dst_hi,
+                      HReg dst_lo, HReg op_hi, HReg op_lo,
+                      s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP128_UNOP;
+   insn->size = size;
+   insn->variant.bfp128_unop.tag = tag;
+   insn->variant.bfp128_unop.dst_hi = dst_hi;
+   insn->variant.bfp128_unop.dst_lo = dst_lo;
+   insn->variant.bfp128_unop.op_hi = op_hi;
+   insn->variant.bfp128_unop.op_lo = op_lo;
+   insn->variant.bfp128_unop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp128_compare(UChar size, HReg dst, HReg op1_hi, HReg op1_lo,
+                         HReg op2_hi, HReg op2_lo)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP128_COMPARE;
+   insn->size = size;
+   insn->variant.bfp128_compare.dst = dst;
+   insn->variant.bfp128_compare.op1_hi = op1_hi;
+   insn->variant.bfp128_compare.op1_lo = op1_lo;
+   insn->variant.bfp128_compare.op2_hi = op2_hi;
+   insn->variant.bfp128_compare.op2_lo = op2_lo;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp128_convert_to(UChar size, s390_bfp_unop_t tag, HReg dst_hi,
+                            HReg dst_lo, HReg op)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP128_CONVERT_TO;
+   insn->size = size;
+   insn->variant.bfp128_unop.tag = tag;
+   insn->variant.bfp128_unop.dst_hi = dst_hi;
+   insn->variant.bfp128_unop.dst_lo = dst_lo;
+   insn->variant.bfp128_unop.op_hi = op;
+   insn->variant.bfp128_unop.op_lo = INVALID_HREG;  /* unused */
+   insn->variant.bfp128_unop.rounding_mode = S390_ROUND_NEAREST_EVEN; /* unused */
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_bfp128_convert_from(UChar size, s390_bfp_unop_t tag, HReg dst,
+                              HReg op_hi, HReg op_lo,
+                              s390_round_t rounding_mode)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_BFP128_CONVERT_FROM;
+   insn->size = size;
+   insn->variant.bfp128_unop.tag = tag;
+   insn->variant.bfp128_unop.dst_hi = dst;
+   insn->variant.bfp128_unop.dst_lo = INVALID_HREG;  /* unused */
+   insn->variant.bfp128_unop.op_hi = op_hi;
+   insn->variant.bfp128_unop.op_lo = op_lo;
+   insn->variant.bfp128_unop.rounding_mode = rounding_mode;
+
+   return insn;
+}
+
+
+s390_insn *
+s390_insn_mfence(void)
+{
+   s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+   insn->tag  = S390_INSN_MFENCE;
+   insn->size = 0;   /* not needed */
+
+   return insn;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Debug print                                             ---*/
+/*---------------------------------------------------------------*/
+
+static const HChar *
+s390_cc_as_string(s390_cc_t cc)
+{
+   switch (cc) {
+   case S390_CC_NEVER:  return "never";
+   case S390_CC_OVFL:   return "overflow";
+   case S390_CC_H:      return "greater than";     /* A > B ; high */
+   case S390_CC_NLE:    return "not low or equal";
+   case S390_CC_L:      return "less than";        /* A < B ; low */
+   case S390_CC_NHE:    return "not high or equal";
+   case S390_CC_LH:     return "low or high";
+   case S390_CC_NE:     return "not equal";        /* A != B ; not zero */
+   case S390_CC_E:      return "equal";            /* A == B ; zero */
+   case S390_CC_NLH:    return "not low or high";
+   case S390_CC_HE:     return "greater or equal"; /* A >= B ; high or equal*/
+   case S390_CC_NL:     return "not low";          /* not low */
+   case S390_CC_LE:     return "less or equal";    /* A <= B ; low or equal */
+   case S390_CC_NH:     return "not high";
+   case S390_CC_NO:     return "not overflow";
+   case S390_CC_ALWAYS: return "always";
+   default:
+      vpanic("s390_cc_as_string");
+   }
+}
+
+
+/* Helper function for writing out a V insn */
+static void
+s390_sprintf(HChar *buf, HChar *fmt, ...)
+{
+   HChar *p;
+   ULong value;
+   va_list args;
+   va_start(args, fmt);
+
+   p = buf;
+   for ( ; *fmt; ++fmt) {
+      Int c = *fmt;
+
+      if (c != '%') {
+         *p++ = c;
+         continue;
+      }
+
+      c = *++fmt;  /* next char */
+      switch (c) {
+      case '%':
+         *p++ = c;   /* %% */
+         continue;
+
+      case 's':     /* %s */
+         p += vex_sprintf(p, "%s", va_arg(args, HChar *));
+         continue;
+
+      case 'M':     /* %M = mnemonic */
+         p += vex_sprintf(p, "%-8s", va_arg(args, HChar *));
+         continue;
+
+      case 'R':     /* %R = register */
+         p += vex_sprintf(p, "%s", s390_hreg_as_string(va_arg(args, HReg)));
+         continue;
+
+      case 'A':     /* %A = amode */
+         p += vex_sprintf(p, "%s",
+                          s390_amode_as_string(va_arg(args, s390_amode *)));
+         continue;
+
+      case 'C':     /* %C = condition code */
+         p += vex_sprintf(p, "%s", s390_cc_as_string(va_arg(args, s390_cc_t)));
+         continue;
+
+      case 'L': {   /* %L = argument list in helper call*/
+         UInt i, num_args;
+
+         num_args = va_arg(args, UInt);
+
+         for (i = 0; i < num_args; ++i) {
+            if (i != 0) p += vex_sprintf(p, ", ");
+            p += vex_sprintf(p, "r%d", s390_gprno_from_arg_index(i));
+         }
+         continue;
+      }
+
+      case 'O': {   /* %O = RMI operand */
+         s390_opnd_RMI *op = va_arg(args, s390_opnd_RMI *);
+
+         switch (op->tag) {
+         case S390_OPND_REG:
+            p += vex_sprintf(p, "%s", s390_hreg_as_string(op->variant.reg));
+            continue;
+
+         case S390_OPND_AMODE:
+            p += vex_sprintf(p, "%s", s390_amode_as_string(op->variant.am));
+            continue;
+
+         case S390_OPND_IMMEDIATE:
+            value = op->variant.imm;
+            goto print_value;
+
+         default:
+            goto fail;
+         }
+      }
+
+      case 'I':     /* %I = immediate value */
+         value = va_arg(args, ULong);
+         goto print_value;
+
+      print_value:
+         if ((Long)value < 0)
+            p += vex_sprintf(p, "%lld", (Long)value);
+         else if (value < 100)
+            p += vex_sprintf(p, "%llu", value);
+         else
+            p += vex_sprintf(p, "0x%llx", value);
+         continue;
+
+      default:
+         goto fail;
+      }
+   }
+   *p = '\0';
+   va_end(args);
+
+   return;
+
+ fail: vpanic("s390_printf");
+}
+
+
+/* Decompile the given insn into a static buffer and return it */
+const HChar *
+s390_insn_as_string(const s390_insn *insn)
+{
+   static HChar buf[300];
+   const HChar *op;
+   HChar *p;
+
+   buf[0] = '\0';
+
+   switch (insn->tag) {
+   case S390_INSN_LOAD:
+      s390_sprintf(buf, "%M %R,%A", "v-load", insn->variant.load.dst,
+                   insn->variant.load.src);
+      break;
+
+   case S390_INSN_STORE:
+      s390_sprintf(buf, "%M %R,%A", "v-store", insn->variant.store.src,
+                   insn->variant.store.dst);
+      break;
+
+   case S390_INSN_MOVE:
+      s390_sprintf(buf, "%M %R,%R", "v-move", insn->variant.move.dst,
+                   insn->variant.move.src);
+      break;
+
+   case S390_INSN_COND_MOVE:
+      s390_sprintf(buf, "%M if (%C) %R,%O", "v-move",
+                   insn->variant.cond_move.cond, insn->variant.cond_move.dst,
+                   &insn->variant.cond_move.src);
+      break;
+
+   case S390_INSN_LOAD_IMMEDIATE:
+      s390_sprintf(buf, "%M %R,%I", "v-loadi", insn->variant.load_immediate.dst,
+                   insn->variant.load_immediate.value);
+      break;
+
+   case S390_INSN_ALU:
+      switch (insn->variant.alu.tag) {
+      case S390_ALU_ADD:  op = "v-add";  break;
+      case S390_ALU_SUB:  op = "v-sub";  break;
+      case S390_ALU_MUL:  op = "v-mul";  break;
+      case S390_ALU_AND:  op = "v-and";  break;
+      case S390_ALU_OR:   op = "v-or";   break;
+      case S390_ALU_XOR:  op = "v-xor";  break;
+      case S390_ALU_LSH:  op = "v-lsh";  break;
+      case S390_ALU_RSH:  op = "v-rsh";  break;
+      case S390_ALU_RSHA: op = "v-rsha"; break;
+      default: goto fail;
+      }
+      s390_sprintf(buf, "%M %R,%R,%O", op, insn->variant.alu.dst,
+                   insn->variant.alu.dst   /* op1 same as dst */,
+                   &insn->variant.alu.op2);
+      break;
+
+   case S390_INSN_MUL:
+      if (insn->variant.mul.signed_multiply) {
+         op = "v-muls";
+      } else {
+         op = "v-mulu";
+      }
+      s390_sprintf(buf, "%M %R,%O", op, insn->variant.mul.dst_hi,
+                   &insn->variant.mul.op2);
+      break;
+
+   case S390_INSN_DIV:
+      if (insn->variant.div.signed_divide) {
+         op = "v-divs";
+      } else {
+         op = "v-divu";
+      }
+      s390_sprintf(buf, "%M %R,%O", op, insn->variant.div.op1_hi,
+                   &insn->variant.div.op2);
+      break;
+
+   case S390_INSN_DIVS:
+      s390_sprintf(buf, "%M %R,%O", "v-divsi", insn->variant.divs.op1,
+                   &insn->variant.divs.op2);
+      break;
+
+   case S390_INSN_CLZ:
+      s390_sprintf(buf, "%M %R,%O", "v-clz", insn->variant.clz.num_bits,
+                   &insn->variant.clz.src);
+      break;
+
+   case S390_INSN_UNOP:
+      switch (insn->variant.unop.tag) {
+      case S390_ZERO_EXTEND_8:
+      case S390_ZERO_EXTEND_16:
+      case S390_ZERO_EXTEND_32:
+         op = "v-zerox";
+         break;
+
+      case S390_SIGN_EXTEND_8:
+      case S390_SIGN_EXTEND_16:
+      case S390_SIGN_EXTEND_32:
+         op = "v-signx";
+         break;
+
+      case S390_NEGATE:
+         op = "v-neg";
+         break;
+
+      default:
+         goto fail;
+      }
+      s390_sprintf(buf, "%M %R,%O", op, insn->variant.unop.dst,
+                   &insn->variant.unop.src);
+      break;
+
+   case S390_INSN_TEST:
+      s390_sprintf(buf, "%M %O", "v-test", &insn->variant.test.src);
+      break;
+
+   case S390_INSN_CC2BOOL:
+      s390_sprintf(buf, "%M %R,%C", "v-cc2b", insn->variant.cc2bool.dst,
+                   insn->variant.cc2bool.cond);
+      break;
+
+   case S390_INSN_CAS:
+      s390_sprintf(buf, "%M %R,%A,%R,%R", "v-cas", insn->variant.cas.op1,
+                   insn->variant.cas.op2, insn->variant.cas.op3,
+                   insn->variant.cas.old_mem);
+      break;
+
+   case S390_INSN_COMPARE:
+      if (insn->variant.compare.signed_comparison) {
+         op = "v-cmps";
+      } else {
+         op = "v-cmpu";
+      }
+      s390_sprintf(buf, "%M %R,%O", op, insn->variant.compare.src1,
+                   &insn->variant.compare.src2);
+      break;
+
+   case S390_INSN_BRANCH:
+      switch (insn->variant.branch.kind) {
+      case Ijk_ClientReq:   op = "clientreq"; break;
+      case Ijk_Sys_syscall: op = "syscall";   break;
+      case Ijk_Yield:       op = "yield";     break;
+      case Ijk_EmWarn:      op = "emwarn";    break;
+      case Ijk_EmFail:      op = "emfail";    break;
+      case Ijk_MapFail:     op = "mapfail";   break;
+      case Ijk_NoDecode:    op = "nodecode";  break;
+      case Ijk_TInval:      op = "tinval";    break;
+      case Ijk_NoRedir:     op = "noredir";   break;
+      case Ijk_SigTRAP:     op = "sigtrap";   break;
+      case Ijk_Boring:      op = "goto";      break;
+      case Ijk_Call:        op = "call";      break;
+      case Ijk_Ret:         op = "return";    break;
+      default:
+         goto fail;
+      }
+      s390_sprintf(buf, "if (%C) %s %O", insn->variant.branch.cond, op,
+                   &insn->variant.branch.dst);
+      break;
+
+   case S390_INSN_HELPER_CALL: {
+
+      if (insn->variant.helper_call.cond != S390_CC_ALWAYS) {
+         s390_sprintf(buf, "%M if (%C) %s{%I}(%L)", "v-call",
+                      insn->variant.helper_call.cond,
+                      insn->variant.helper_call.name,
+                      insn->variant.helper_call.target,
+                      insn->variant.helper_call.num_args);
+      } else {
+         s390_sprintf(buf, "%M %s{%I}(%L)", "v-call",
+                      insn->variant.helper_call.name,
+                      insn->variant.helper_call.target,
+                      insn->variant.helper_call.num_args);
+      }
+      break;
+   }
+
+   case S390_INSN_BFP_TRIOP:
+      switch (insn->variant.bfp_triop.tag) {
+      case S390_BFP_MADD:  op = "v-fmadd";  break;
+      case S390_BFP_MSUB:  op = "v-fmsub";  break;
+      default: goto fail;
+      }
+      s390_sprintf(buf, "%M %R,%R,%R,%R", op, insn->variant.bfp_triop.dst,
+                   insn->variant.bfp_triop.dst  /* op1 same as dst */,
+                   insn->variant.bfp_triop.op2, insn->variant.bfp_triop.op3);
+      break;
+
+   case S390_INSN_BFP_BINOP:
+      switch (insn->variant.bfp_binop.tag) {
+      case S390_BFP_ADD:      op = "v-fadd";  break;
+      case S390_BFP_SUB:      op = "v-fsub";  break;
+      case S390_BFP_MUL:      op = "v-fmul";  break;
+      case S390_BFP_DIV:      op = "v-fdiv";  break;
+      default: goto fail;
+      }
+      s390_sprintf(buf, "%M %R,%R,%R", op, insn->variant.bfp_binop.dst,
+                   insn->variant.bfp_binop.dst  /* op1 same as dst */,
+                   insn->variant.bfp_binop.op2);
+      break;
+
+   case S390_INSN_BFP_COMPARE:
+      s390_sprintf(buf, "%M %R,%R,%R", "v-fcmp", insn->variant.bfp_compare.dst,
+                   insn->variant.bfp_compare.op1, insn->variant.bfp_compare.op2);
+      break;
+
+   case S390_INSN_BFP_UNOP:
+      switch (insn->variant.bfp_unop.tag) {
+      case S390_BFP_ABS:         op = "v-fabs";  break;
+      case S390_BFP_NABS:        op = "v-fnabs"; break;
+      case S390_BFP_NEG:         op = "v-fneg";  break;
+      case S390_BFP_SQRT:        op = "v-fsqrt"; break;
+      case S390_BFP_I32_TO_F32:
+      case S390_BFP_I32_TO_F64:
+      case S390_BFP_I32_TO_F128:
+      case S390_BFP_I64_TO_F32:
+      case S390_BFP_I64_TO_F64:
+      case S390_BFP_I64_TO_F128: op = "v-i2f"; break;
+      case S390_BFP_F32_TO_I32:
+      case S390_BFP_F32_TO_I64:
+      case S390_BFP_F64_TO_I32:
+      case S390_BFP_F64_TO_I64:
+      case S390_BFP_F128_TO_I32:
+      case S390_BFP_F128_TO_I64: op = "v-f2i"; break;
+      case S390_BFP_F32_TO_F64:
+      case S390_BFP_F32_TO_F128:
+      case S390_BFP_F64_TO_F32:
+      case S390_BFP_F64_TO_F128:
+      case S390_BFP_F128_TO_F32:
+      case S390_BFP_F128_TO_F64: op = "v-f2f"; break;
+      default: goto fail;
+      }
+      s390_sprintf(buf, "%M %R,%R", op, insn->variant.bfp_unop.dst,
+                   insn->variant.bfp_unop.op);
+      break;
+
+   case S390_INSN_BFP128_BINOP:
+      switch (insn->variant.bfp128_binop.tag) {
+      case S390_BFP_ADD:      op = "v-fadd";  break;
+      case S390_BFP_SUB:      op = "v-fsub";  break;
+      case S390_BFP_MUL:      op = "v-fmul";  break;
+      case S390_BFP_DIV:      op = "v-fdiv";  break;
+      default: goto fail;
+      }
+      /* Only write the register that identifies the register pair */
+      s390_sprintf(buf, "%M %R,%R,%R", op, insn->variant.bfp128_binop.dst_hi,
+                   insn->variant.bfp128_binop.dst_hi  /* op1 same as dst */,
+                   insn->variant.bfp128_binop.op2_hi);
+      break;
+
+   case S390_INSN_BFP128_COMPARE:
+      /* Only write the register that identifies the register pair */
+      s390_sprintf(buf, "%M %R,%R,%R", "v-fcmp", insn->variant.bfp128_compare.dst,
+                   insn->variant.bfp128_compare.op1_hi,
+                   insn->variant.bfp128_compare.op2_hi);
+      break;
+
+   case S390_INSN_BFP128_UNOP:
+   case S390_INSN_BFP128_CONVERT_TO:
+   case S390_INSN_BFP128_CONVERT_FROM:
+      switch (insn->variant.bfp128_unop.tag) {
+      case S390_BFP_ABS:         op = "v-fabs";  break;
+      case S390_BFP_NABS:        op = "v-fnabs"; break;
+      case S390_BFP_NEG:         op = "v-fneg";  break;
+      case S390_BFP_SQRT:        op = "v-fsqrt"; break;
+      case S390_BFP_I32_TO_F128:
+      case S390_BFP_I64_TO_F128: op = "v-i2f";   break;
+      case S390_BFP_F128_TO_I32:
+      case S390_BFP_F128_TO_I64: op = "v-f2i";   break;
+      case S390_BFP_F32_TO_F128:
+      case S390_BFP_F64_TO_F128:
+      case S390_BFP_F128_TO_F32:
+      case S390_BFP_F128_TO_F64: op = "v-f2f";   break;
+      default: goto fail;
+      }
+      /* Only write the register that identifies the register pair */
+      s390_sprintf(buf, "%M %R,%R", op, insn->variant.bfp128_unop.dst_hi,
+                   insn->variant.bfp128_unop.op_hi);
+      break;
+
+   case S390_INSN_MFENCE:
+      s390_sprintf(buf, "%M", "v-mfence");
+      return buf;   /* avoid printing "size = ..." which is meaningless */
+
+   default: goto fail;
+   }
+
+   /* Write out how many bytes are involved in the operation */
+
+   {
+      UInt len, i;
+
+      for (p = buf; *p; ++p)
+         continue;
+
+      len = p - buf;
+
+      if (len < 32) {
+         for (i = len; i < 32; ++i)
+            p += vex_sprintf(p, " ");
+      } else {
+         p += vex_sprintf(p, "\t");
+      }
+   }
+
+   /* Special cases first */
+   switch (insn->tag) {
+   case S390_INSN_UNOP:
+      switch (insn->variant.unop.tag) {
+      case S390_SIGN_EXTEND_8:
+      case S390_ZERO_EXTEND_8:  p += vex_sprintf(p, "1 -> "); goto common;
+      case S390_SIGN_EXTEND_16:
+      case S390_ZERO_EXTEND_16: p += vex_sprintf(p, "2 -> "); goto common;
+      case S390_SIGN_EXTEND_32:
+      case S390_ZERO_EXTEND_32: p += vex_sprintf(p, "4 -> "); goto common;
+      default:
+         goto common;
+      }
+
+   case S390_INSN_BFP_UNOP:
+      switch (insn->variant.bfp_unop.tag) {
+      case S390_BFP_I32_TO_F32:
+      case S390_BFP_I32_TO_F64:
+      case S390_BFP_I32_TO_F128:
+      case S390_BFP_F32_TO_I32:
+      case S390_BFP_F32_TO_I64:
+      case S390_BFP_F32_TO_F64:
+      case S390_BFP_F32_TO_F128: p += vex_sprintf(p, "4 -> "); goto common;
+      case S390_BFP_I64_TO_F32:
+      case S390_BFP_I64_TO_F64:
+      case S390_BFP_I64_TO_F128:
+      case S390_BFP_F64_TO_I32:
+      case S390_BFP_F64_TO_I64:
+      case S390_BFP_F64_TO_F32:
+      case S390_BFP_F64_TO_F128: p += vex_sprintf(p, "8 -> "); goto common;
+      case S390_BFP_F128_TO_I32:
+      case S390_BFP_F128_TO_I64:
+      case S390_BFP_F128_TO_F32:
+      case S390_BFP_F128_TO_F64: p += vex_sprintf(p, "16 -> "); goto common;
+      default:
+         goto common;
+      }
+
+   case S390_INSN_BFP128_UNOP:
+   case S390_INSN_BFP128_CONVERT_TO:
+   case S390_INSN_BFP128_CONVERT_FROM:
+      switch (insn->variant.bfp128_unop.tag) {
+      case S390_BFP_I32_TO_F128:
+      case S390_BFP_F32_TO_F128: p += vex_sprintf(p, "4 -> "); goto common;
+      case S390_BFP_I64_TO_F128:
+      case S390_BFP_F64_TO_F128: p += vex_sprintf(p, "8 -> "); goto common;
+      case S390_BFP_F128_TO_I32:
+      case S390_BFP_F128_TO_I64:
+      case S390_BFP_F128_TO_F32:
+      case S390_BFP_F128_TO_F64: p += vex_sprintf(p, "16 -> "); goto common;
+      default:
+         goto common;
+      }
+
+   default:
+      goto common;
+   }
+
+   /* Common case */
+ common:
+   vex_sprintf(p, "%u bytes", (UInt)insn->size);
+
+   return buf;
+
+ fail: vpanic("s390_insn_as_string");
+}
+
+
+
+/* Load NUM bytes from memory into register REG using addressing mode AM. */
+static UChar *
+s390_emit_load_mem(UChar *p, UInt num, UChar reg, const s390_amode *am)
+{
+   UInt b = hregNumber(am->b);
+   UInt x = hregNumber(am->x);  /* 0 for B12 and B20 */
+   UInt d = am->d;
+
+   switch (am->tag) {
+   case S390_AMODE_B12:
+   case S390_AMODE_BX12:
+      switch (num) {
+      case 1: return s390_emit_IC(p, reg, x, b, d);
+      case 2: return s390_emit_LH(p, reg, x, b, d);
+      case 4: return s390_emit_L(p, reg, x, b, d);
+      case 8: return s390_emit_LG(p, reg, x, b, DISP20(d));
+      default: goto fail;
+      }
+      break;
+
+   case S390_AMODE_B20:
+   case S390_AMODE_BX20:
+      switch (num) {
+      case 1: return s390_emit_ICY(p, reg, x, b, DISP20(d));
+      case 2: return s390_emit_LHY(p, reg, x, b, DISP20(d));
+      case 4: return s390_emit_LY(p, reg, x, b, DISP20(d));
+      case 8: return s390_emit_LG(p, reg, x, b, DISP20(d));
+      default: goto fail;
+      }
+      break;
+
+   default: goto fail;
+   }
+
+ fail:
+   vpanic("s390_emit_load_mem");
+}
+
+
+/* Load condition code into register REG */
+static UChar *
+s390_emit_load_cc(UChar *p, UChar reg)
+{
+   p = s390_emit_LGHI(p, reg, 0);  /* Clear out, cc not affected */
+   p = s390_emit_IPM(p, reg, reg);
+   /* Shift 28 bits to the right --> [0,1,2,3] */
+   return s390_emit_SRL(p, reg, 0, 28); /* REG = cc */
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- Code generation                                         ---*/
+/*---------------------------------------------------------------*/
+
+/* Do not load more bytes than requested. */
+static UChar *
+s390_insn_load_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r, x, b, d;
+   const s390_amode *src;
+
+   src = insn->variant.load.src;
+
+   r = hregNumber(insn->variant.load.dst);
+
+   if (hregClass(insn->variant.load.dst) == HRcFlt64) {
+      b = hregNumber(src->b);
+      x = hregNumber(src->x);  /* 0 for B12 and B20 */
+      d = src->d;
+
+      switch (insn->size) {
+
+      case 4:
+         switch (src->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            return s390_emit_LE(buf, r, x, b, d);
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            return s390_emit_LEY(buf, r, x, b, DISP20(d));
+         }
+         break;
+
+      case 8:
+         switch (src->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            return s390_emit_LD(buf, r, x, b, d);
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            return s390_emit_LDY(buf, r, x, b, DISP20(d));
+         }
+         break;
+      }
+      vpanic("s390_insn_load_emit");
+   }
+
+   /* Integer stuff */
+   return s390_emit_load_mem(buf, insn->size, r, src);
+}
+
+
+static UChar *
+s390_insn_store_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r, x, b, d;
+   const s390_amode *dst;
+
+   dst = insn->variant.store.dst;
+
+   r = hregNumber(insn->variant.store.src);
+   b = hregNumber(dst->b);
+   x = hregNumber(dst->x);  /* 0 for B12 and B20 */
+   d = dst->d;
+
+   if (hregClass(insn->variant.store.src) == HRcFlt64) {
+      switch (insn->size) {
+
+      case 4:
+         switch (dst->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            return s390_emit_STE(buf, r, x, b, d);
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            return s390_emit_STEY(buf, r, x, b, DISP20(d));
+         }
+         break;
+
+      case 8:
+         switch (dst->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            return s390_emit_STD(buf, r, x, b, d);
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            return s390_emit_STDY(buf, r, x, b, DISP20(d));
+         }
+         break;
+      }
+      vpanic("s390_insn_store_emit");
+   }
+
+   /* Integer stuff */
+   switch (insn->size) {
+   case 1:
+      switch (dst->tag) {
+      case S390_AMODE_B12:
+      case S390_AMODE_BX12:
+         return s390_emit_STC(buf, r, x, b, d);
+
+      case S390_AMODE_B20:
+      case S390_AMODE_BX20:
+         return s390_emit_STCY(buf, r, x, b, DISP20(d));
+      }
+      break;
+
+   case 2:
+      switch (dst->tag) {
+      case S390_AMODE_B12:
+      case S390_AMODE_BX12:
+         return s390_emit_STH(buf, r, x, b, d);
+
+      case S390_AMODE_B20:
+      case S390_AMODE_BX20:
+         return s390_emit_STHY(buf, r, x, b, DISP20(d));
+      }
+      break;
+
+   case 4:
+      switch (dst->tag) {
+      case S390_AMODE_B12:
+      case S390_AMODE_BX12:
+         return s390_emit_ST(buf, r, x, b, d);
+
+      case S390_AMODE_B20:
+      case S390_AMODE_BX20:
+         return s390_emit_STY(buf, r, x, b, DISP20(d));
+      }
+      break;
+
+   case 8:
+      return s390_emit_STG(buf, r, x, b, DISP20(d));
+
+   default:
+      break;
+   }
+
+   vpanic("s390_insn_store_emit");
+}
+
+
+static UChar *
+s390_insn_move_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt dst, src;
+   HRegClass dst_class, src_class;
+
+   dst = hregNumber(insn->variant.move.dst);
+   src = hregNumber(insn->variant.move.src);
+
+   dst_class = hregClass(insn->variant.move.dst);
+   src_class = hregClass(insn->variant.move.src);
+
+   if (dst_class == src_class) {
+      if (dst_class == HRcInt64)
+         return s390_emit_LGR(buf, dst, src);
+      if (dst_class == HRcFlt64)
+         return s390_emit_LDR(buf, dst, src);
+   } else {
+      if (dst_class == HRcFlt64 && src_class == HRcInt64)
+         return s390_emit_LDGRw(buf, dst, src);
+      if (dst_class == HRcInt64 && src_class == HRcFlt64)
+         return s390_emit_LGDRw(buf, dst, src);
+      /* A move between floating point registers and general purpose
+         registers of different size should never occur and indicates
+         an error elsewhere. */
+   }
+
+   vpanic("s390_insn_move_emit");
+}
+
+
+static UChar *
+s390_insn_load_immediate_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt  r;
+   ULong value = insn->variant.load_immediate.value;
+
+   r = hregNumber(insn->variant.load_immediate.dst);
+
+   if (hregClass(insn->variant.load_immediate.dst) == HRcFlt64) {
+      vassert(value == 0);
+      switch (insn->size) {
+      case 4: return s390_emit_LZER(buf, r, value);
+      case 8: return s390_emit_LZDR(buf, r, value);
+      }
+      vpanic("s390_insn_load_immediate_emit");
+   }
+
+   switch (insn->size) {
+   case 1:
+   case 2:
+      /* Load the immediate values as a 4 byte value. That does not hurt as
+         those extra bytes will not be looked at. Fall through .... */
+   case 4:
+      return s390_emit_load_32imm(buf, r, value);
+
+   case 8:
+      return s390_emit_load_64imm(buf, r, value);
+   }
+
+   vpanic("s390_insn_load_immediate_emit");
+}
+
+
+/* There is no easy way to do ALU operations on 1-byte or 2-byte operands.
+   So we simply perform a 4-byte operation. Doing so uses possibly undefined
+   bits and produces an undefined result in those extra bit positions. But
+   upstream does not look at those positions, so this is OK. */
+static UChar *
+s390_insn_alu_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI op2;
+   UInt dst;
+
+   dst = hregNumber(insn->variant.alu.dst);
+   op2 = insn->variant.alu.op2;
+
+   /* Second operand is in a register */
+   if (op2.tag == S390_OPND_REG) {
+      UInt r2 = hregNumber(op2.variant.reg);
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+      case 4:
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD:  return s390_emit_AR(buf, dst, r2);
+         case S390_ALU_SUB:  return s390_emit_SR(buf, dst, r2);
+         case S390_ALU_MUL:  return s390_emit_MSR(buf, dst, r2);
+         case S390_ALU_AND:  return s390_emit_NR(buf, dst, r2);
+         case S390_ALU_OR:   return s390_emit_OR(buf, dst, r2);
+         case S390_ALU_XOR:  return s390_emit_XR(buf, dst, r2);
+         case S390_ALU_LSH:  return s390_emit_SLL(buf, dst, r2, 0);
+         case S390_ALU_RSH:  return s390_emit_SRL(buf, dst, r2, 0);
+         case S390_ALU_RSHA: return s390_emit_SRA(buf, dst, r2, 0);
+         }
+         goto fail;
+
+      case 8:
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD:  return s390_emit_AGR(buf, dst, r2);
+         case S390_ALU_SUB:  return s390_emit_SGR(buf, dst, r2);
+         case S390_ALU_MUL:  return s390_emit_MSGR(buf, dst, r2);
+         case S390_ALU_AND:  return s390_emit_NGR(buf, dst, r2);
+         case S390_ALU_OR:   return s390_emit_OGR(buf, dst, r2);
+         case S390_ALU_XOR:  return s390_emit_XGR(buf, dst, r2);
+         case S390_ALU_LSH:  return s390_emit_SLLG(buf, dst, dst, r2, DISP20(0));
+         case S390_ALU_RSH:  return s390_emit_SRLG(buf, dst, dst, r2, DISP20(0));
+         case S390_ALU_RSHA: return s390_emit_SRAG(buf, dst, dst, r2, DISP20(0));
+         }
+         goto fail;
+      }
+      goto fail;
+   }
+
+   /* 2nd operand is in memory */
+   if (op2.tag == S390_OPND_AMODE) {
+      UInt b, x, d;
+      const s390_amode *src = op2.variant.am;
+
+      b = hregNumber(src->b);
+      x = hregNumber(src->x);  /* 0 for B12 and B20 */
+      d = src->d;
+
+      /* Shift operands are special here as there are no opcodes that
+         allow a memory operand. So we first load the 2nd operand into
+         some register. R0 is used to save restore the contents of the
+         chosen register.. */
+
+      if (insn->variant.alu.tag == S390_ALU_LSH ||
+          insn->variant.alu.tag == S390_ALU_RSH ||
+          insn->variant.alu.tag == S390_ALU_RSHA) {
+         UInt b2;
+
+         /* Choose a register (other than DST or R0) into which to stick the
+            shift amount. The following works because r15 is reserved and
+            thusly dst != 15. */
+         vassert(dst != 15);  /* extra paranoia */
+         b2 = (dst + 1) % 16;
+         
+         buf = s390_emit_LGR(buf, R0, b2);  /* save */
+
+         /* Loading SRC to B2 does not modify R0. */
+         buf = s390_emit_load_mem(buf, insn->size, b2, src);
+
+         if (insn->size == 8) {
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_LSH:
+               buf = s390_emit_SLLG(buf, dst, dst, b2, DISP20(0));
+               break;
+            case S390_ALU_RSH:
+               buf = s390_emit_SRLG(buf, dst, dst, b2, DISP20(0));
+               break;
+            case S390_ALU_RSHA:
+               buf = s390_emit_SRAG(buf, dst, dst, b2, DISP20(0));
+               break;
+            default: /* unreachable */
+               goto fail;
+            }
+         } else {
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_LSH:
+               buf = s390_emit_SLL(buf, dst, b2, 0);
+               break;
+            case S390_ALU_RSH:
+               buf = s390_emit_SRL(buf, dst, b2, 0);
+               break;
+            case S390_ALU_RSHA:
+               buf = s390_emit_SRA(buf, dst, b2, 0);
+               break;
+            default: /* unreachable */
+               goto fail;
+            }
+         }
+         return s390_emit_LGR(buf, b2, R0);  /* restore */
+      }
+
+      switch (insn->size) {
+      case 1:
+         /* Move the byte from memory into scratch register r0 */
+         buf = s390_emit_load_mem(buf, 1, R0, src);
+
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD: return s390_emit_AR(buf, dst, R0);
+         case S390_ALU_SUB: return s390_emit_SR(buf, dst, R0);
+         case S390_ALU_MUL: return s390_emit_MSR(buf, dst, R0);
+         case S390_ALU_AND: return s390_emit_NR(buf, dst, R0);
+         case S390_ALU_OR:  return s390_emit_OR(buf, dst, R0);
+         case S390_ALU_XOR: return s390_emit_XR(buf, dst, R0);
+         case S390_ALU_LSH:
+         case S390_ALU_RSH:
+         case S390_ALU_RSHA: ; /* avoid GCC warning */
+         }
+         goto fail;
+
+      case 2:
+         switch (src->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_ADD:
+               return s390_emit_AH(buf, dst, x, b, d);
+
+            case S390_ALU_SUB:
+               return s390_emit_SH(buf, dst, x, b, d);
+
+            case S390_ALU_MUL:
+               return s390_emit_MH(buf, dst, x, b, d);
+
+               /* For bitwise operations: Move two bytes from memory into scratch
+                  register r0; then perform operation */
+            case S390_ALU_AND:
+               buf = s390_emit_LH(buf, R0, x, b, d);
+               return s390_emit_NR(buf, dst, R0);
+
+            case S390_ALU_OR:
+               buf = s390_emit_LH(buf, R0, x, b, d);
+               return s390_emit_OR(buf, dst, R0);
+
+            case S390_ALU_XOR:
+               buf = s390_emit_LH(buf, R0, x, b, d);
+               return s390_emit_XR(buf, dst, R0);
+
+            case S390_ALU_LSH:
+            case S390_ALU_RSH:
+            case S390_ALU_RSHA: ; /* avoid GCC warning */
+            }
+            goto fail;
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_ADD:
+               return s390_emit_AHY(buf, dst, x, b, DISP20(d));
+
+            case S390_ALU_SUB:
+               return s390_emit_SHY(buf, dst, x, b, DISP20(d));
+
+            case S390_ALU_MUL:
+               return s390_emit_MHYw(buf, dst, x, b, DISP20(d));
+
+               /* For bitwise operations: Move two bytes from memory into scratch
+                  register r0; then perform operation */
+            case S390_ALU_AND:
+               buf = s390_emit_LHY(buf, R0, x, b, DISP20(d));
+               return s390_emit_NR(buf, dst, R0);
+
+            case S390_ALU_OR:
+               buf = s390_emit_LHY(buf, R0, x, b, DISP20(d));
+               return s390_emit_OR(buf, dst, R0);
+
+            case S390_ALU_XOR:
+               buf = s390_emit_LHY(buf, R0, x, b, DISP20(d));
+               return s390_emit_XR(buf, dst, R0);
+
+            case S390_ALU_LSH:
+            case S390_ALU_RSH:
+            case S390_ALU_RSHA: ; /* avoid GCC warning */
+            }
+            goto fail;
+         }
+         goto fail;
+
+      case 4:
+         switch (src->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_ADD: return s390_emit_A(buf, dst, x, b, d);
+            case S390_ALU_SUB: return s390_emit_S(buf, dst, x, b, d);
+            case S390_ALU_MUL: return s390_emit_MS(buf, dst, x, b, d);
+            case S390_ALU_AND: return s390_emit_N(buf, dst, x, b, d);
+            case S390_ALU_OR:  return s390_emit_O(buf, dst, x, b, d);
+            case S390_ALU_XOR: return s390_emit_X(buf, dst, x, b, d);
+            case S390_ALU_LSH:
+            case S390_ALU_RSH:
+            case S390_ALU_RSHA: ; /* avoid GCC warning */
+            }
+            goto fail;
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            switch (insn->variant.alu.tag) {
+            case S390_ALU_ADD: return s390_emit_AY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_SUB: return s390_emit_SY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_MUL: return s390_emit_MSY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_AND: return s390_emit_NY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_OR:  return s390_emit_OY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_XOR: return s390_emit_XY(buf, dst, x, b, DISP20(d));
+            case S390_ALU_LSH:
+            case S390_ALU_RSH:
+            case S390_ALU_RSHA: ; /* avoid GCC warning */
+            }
+            goto fail;
+         }
+         goto fail;
+
+      case 8:
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD: return s390_emit_AG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_SUB: return s390_emit_SG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_MUL: return s390_emit_MSG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_AND: return s390_emit_NG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_OR:  return s390_emit_OG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_XOR: return s390_emit_XG(buf, dst, x, b, DISP20(d));
+         case S390_ALU_LSH:
+         case S390_ALU_RSH:
+         case S390_ALU_RSHA: ; /* avoid GCC warning */
+         }
+         goto fail;
+      }
+      goto fail;
+   }
+
+   /* 2nd operand is an immediate value */
+   if (op2.tag == S390_OPND_IMMEDIATE) {
+      ULong value;
+
+      /* No masking of the value is required as it is not sign extended */
+      value = op2.variant.imm;
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+         /* There is no 1-byte opcode. Do the computation in
+            2 bytes. The extra byte will be ignored. */
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD:
+            return s390_emit_AHI(buf, dst, value);
+
+         case S390_ALU_SUB:
+            return s390_emit_SLFIw(buf, dst, value);
+
+         case S390_ALU_MUL:
+            return s390_emit_MHI(buf, dst, value);
+
+         case S390_ALU_AND: return s390_emit_NILL(buf, dst, value);
+         case S390_ALU_OR:  return s390_emit_OILL(buf, dst, value);
+         case S390_ALU_XOR:
+            /* There is no XILL instruction.  Load the immediate value into
+               R0 and combine with the destination register. */
+            buf = s390_emit_LHI(buf, R0, value);
+            return s390_emit_XR(buf, dst, R0);
+
+         case S390_ALU_LSH:
+            return s390_emit_SLL(buf, dst, 0, value);
+
+         case S390_ALU_RSH:
+            return s390_emit_SRL(buf, dst, 0, value);
+
+         case S390_ALU_RSHA:
+            return s390_emit_SRA(buf, dst, 0, value);
+         }
+         goto fail;
+
+      case 4:
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD:
+            if (uint_fits_signed_16bit(value)) {
+               return s390_emit_AHI(buf, dst, value);
+            }
+            return s390_emit_AFIw(buf, dst, value);
+
+         case S390_ALU_SUB:  return s390_emit_SLFIw(buf, dst, value);
+         case S390_ALU_MUL:  return s390_emit_MSFIw(buf, dst, value);
+         case S390_ALU_AND:  return s390_emit_NILFw(buf, dst, value);
+         case S390_ALU_OR:   return s390_emit_OILFw(buf, dst, value);
+         case S390_ALU_XOR:  return s390_emit_XILFw(buf, dst, value);
+         case S390_ALU_LSH:  return s390_emit_SLL(buf, dst, 0, value);
+         case S390_ALU_RSH:  return s390_emit_SRL(buf, dst, 0, value);
+         case S390_ALU_RSHA: return s390_emit_SRA(buf, dst, 0, value);
+         }
+         goto fail;
+
+      case 8:
+         switch (insn->variant.alu.tag) {
+         case S390_ALU_ADD:
+            if (ulong_fits_signed_16bit(value)) {
+               return s390_emit_AGHI(buf, dst, value);
+            }
+            if (ulong_fits_signed_32bit(value) && s390_host_has_eimm) {
+               return s390_emit_AGFI(buf, dst, value);
+            }
+            /* Load constant into R0 then add */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_AGR(buf, dst, R0);
+
+         case S390_ALU_SUB:
+            if (ulong_fits_unsigned_32bit(value)) {
+               return s390_emit_SLGFIw(buf, dst, value);
+            }
+            /* Load value into R0; then subtract from destination reg */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_SGR(buf, dst, R0);
+
+         case S390_ALU_MUL:
+            if (ulong_fits_signed_32bit(value) && s390_host_has_gie) {
+               return s390_emit_MSGFI(buf, dst, value);
+            }
+            /* Load constant into R0 then add */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_MSGR(buf, dst, R0);
+
+            /* Do it in two steps: upper half [0:31] and lower half [32:63] */
+         case S390_ALU_AND:
+            if (s390_host_has_eimm) {
+               buf  = s390_emit_NIHF(buf, dst, value >> 32);
+               return s390_emit_NILF(buf, dst, value & 0xFFFFFFFF);
+            }
+            /* Load value into R0; then combine with destination reg */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_NGR(buf, dst, R0);
+
+         case S390_ALU_OR:
+            if (s390_host_has_eimm) {
+               buf  = s390_emit_OIHF(buf, dst, value >> 32);
+               return s390_emit_OILF(buf, dst, value & 0xFFFFFFFF);
+            }
+            /* Load value into R0; then combine with destination reg */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_OGR(buf, dst, R0);
+
+         case S390_ALU_XOR:
+            if (s390_host_has_eimm) {
+               buf  = s390_emit_XIHF(buf, dst, value >> 32);
+               return s390_emit_XILF(buf, dst, value & 0xFFFFFFFF);
+            }
+            /* Load value into R0; then combine with destination reg */
+            buf = s390_emit_load_64imm(buf, R0, value);
+            return s390_emit_XGR(buf, dst, R0);
+
+            /* No special considerations for long displacement here. Only the six
+               least significant bits of VALUE will be taken; all other bits are
+               ignored. So the DH2 bits are irrelevant and do not influence the
+               shift operation, independent of whether long-displacement is available
+               or not. */
+         case S390_ALU_LSH:  return s390_emit_SLLG(buf, dst, dst, 0, DISP20(value));
+         case S390_ALU_RSH:  return s390_emit_SRLG(buf, dst, dst, 0, DISP20(value));
+         case S390_ALU_RSHA: return s390_emit_SRAG(buf, dst, dst, 0, DISP20(value));
+         }
+         goto fail;
+      }
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_alu_emit");
+}
+
+
+static UChar *
+s390_widen_emit(UChar *buf, const s390_insn *insn, UInt from_size,
+                Bool sign_extend)
+{
+   s390_opnd_RMI opnd = insn->variant.unop.src;
+
+   switch (opnd.tag) {
+   case S390_OPND_REG: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+      UChar r2 = hregNumber(opnd.variant.reg);
+
+      switch (from_size) {
+      case 1:
+         /* Widening to a half-word is implemented like widening to a word
+            because the upper half-word will not be looked at. */
+         if (insn->size == 4 || insn->size == 2) {  /* 8 --> 32    8 --> 16 */
+            if (sign_extend)
+               return s390_emit_LBRw(buf, r1, r2);
+            else
+               return s390_emit_LLCRw(buf, r1, r2);
+         }
+         if (insn->size == 8) {  /* 8 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGBRw(buf, r1, r2);
+            else
+               return s390_emit_LLGCRw(buf, r1, r2);
+         }
+         goto fail;
+
+      case 2:
+         if (insn->size == 4) {  /* 16 --> 32 */
+            if (sign_extend)
+               return s390_emit_LHRw(buf, r1, r2);
+            else
+               return s390_emit_LLHRw(buf, r1, r2);
+         }
+         if (insn->size == 8) {  /* 16 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGHRw(buf, r1, r2);
+            else
+               return s390_emit_LLGHRw(buf, r1, r2);
+         }
+         goto fail;
+
+      case 4:
+         if (insn->size == 8) {  /* 32 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGFR(buf, r1, r2);
+            else
+               return s390_emit_LLGFR(buf, r1, r2);
+         }
+         goto fail;
+
+      default: /* unexpected "from" size */
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+      const s390_amode *src = opnd.variant.am;
+      UChar b = hregNumber(src->b);
+      UChar x = hregNumber(src->x);
+      Int   d = src->d;
+
+      switch (from_size) {
+      case 1:
+         if (insn->size == 4 || insn->size == 2) {
+            if (sign_extend)
+               return s390_emit_LBw(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_LLCw(buf, r1, x, b, DISP20(d));
+         }
+         if (insn->size == 8) {
+            if (sign_extend)
+               return s390_emit_LGBw(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_LLGC(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      case 2:
+         if (insn->size == 4) {  /* 16 --> 32 */
+            if (sign_extend == 0)
+               return s390_emit_LLHw(buf, r1, x, b, DISP20(d));
+
+            switch (src->tag) {
+            case S390_AMODE_B12:
+            case S390_AMODE_BX12:
+               return s390_emit_LH(buf, r1, x, b, d);
+
+            case S390_AMODE_B20:
+            case S390_AMODE_BX20:
+               return s390_emit_LHY(buf, r1, x, b, DISP20(d));
+            }
+            goto fail;
+         }
+         if (insn->size == 8) {  /* 16 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGH(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_LLGH(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      case 4:
+         if (insn->size == 8) {  /* 32 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGF(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_LLGF(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      default: /* unexpected "from" size */
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+      ULong value = opnd.variant.imm;
+
+      switch (from_size) {
+      case 1:
+         if (insn->size == 4 || insn->size == 2) {  /* 8 --> 32   8 --> 16 */
+            if (sign_extend) {
+               /* host can do the sign extension to 16-bit; LHI does the rest */
+               return s390_emit_LHI(buf, r1, (Short)(Char)(UChar)value);
+            } else {
+               return s390_emit_LHI(buf, r1, value);
+            }
+         }
+         if (insn->size == 8) {  /* 8 --> 64 */
+            if (sign_extend) {
+               /* host can do the sign extension to 16-bit; LGHI does the rest */
+               return s390_emit_LGHI(buf, r1, (Short)(Char)(UChar)value);
+            } else {
+               return s390_emit_LGHI(buf, r1, value);
+            }
+         }
+         goto fail;
+
+      case 2:
+         if (insn->size == 4) {  /* 16 --> 32 */
+            return s390_emit_LHI(buf, r1, value);
+         }
+         if (insn->size == 8) {  /* 16 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGHI(buf, r1, value);
+            else
+               return s390_emit_LLILL(buf, r1, value);
+         }
+         goto fail;
+
+      case 4:
+         if (insn->size == 8) {  /* 32 --> 64 */
+            if (sign_extend)
+               return s390_emit_LGFIw(buf, r1, value);
+            else
+               return s390_emit_LLILFw(buf, r1, value);
+         }
+         goto fail;
+
+      default: /* unexpected "from" size */
+         goto fail;
+      }
+   }
+   }
+
+ fail:
+   vpanic("s390_widen_emit");
+}
+
+
+static UChar *
+s390_negate_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI opnd;
+
+   opnd = insn->variant.unop.src;
+
+   switch (opnd.tag) {
+   case S390_OPND_REG: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+      UChar r2 = hregNumber(opnd.variant.reg);
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+      case 4:
+         return s390_emit_LCR(buf, r1, r2);
+
+      case 8:
+         return s390_emit_LCGR(buf, r1, r2);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+
+      /* Load bytes into scratch register R0, then negate */
+      buf = s390_emit_load_mem(buf, insn->size, R0, opnd.variant.am);
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+      case 4:
+         return s390_emit_LCR(buf, r1, R0);
+
+      case 8:
+         return s390_emit_LCGR(buf, r1, R0);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      UChar r1 = hregNumber(insn->variant.unop.dst);
+      ULong value = opnd.variant.imm;
+
+      value = ~value + 1;   /* two's complement */
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+         /* Load the immediate values as a 4 byte value. That does not hurt as
+            those extra bytes will not be looked at. Fall through .... */
+      case 4:
+         return s390_emit_load_32imm(buf, r1, value);
+
+      case 8:
+         return s390_emit_load_64imm(buf, r1, value);
+
+      default:
+         goto fail;
+      }
+   }
+   }
+
+ fail:
+   vpanic("s390_negate_emit");
+}
+
+
+static UChar *
+s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
+{
+   switch (insn->variant.unop.tag) {
+   case S390_ZERO_EXTEND_8:  return s390_widen_emit(buf, insn, 1, 0);
+   case S390_ZERO_EXTEND_16: return s390_widen_emit(buf, insn, 2, 0);
+   case S390_ZERO_EXTEND_32: return s390_widen_emit(buf, insn, 4, 0);
+
+   case S390_SIGN_EXTEND_8:  return s390_widen_emit(buf, insn, 1, 1);
+   case S390_SIGN_EXTEND_16: return s390_widen_emit(buf, insn, 2, 1);
+   case S390_SIGN_EXTEND_32: return s390_widen_emit(buf, insn, 4, 1);
+
+   case S390_NEGATE:         return s390_negate_emit(buf, insn);
+   }
+
+   vpanic("s390_insn_unop_emit");
+}
+
+
+/* Only 4-byte and 8-byte operands are handled. 1-byte and 2-byte
+   comparisons will have been converted to 4-byte comparisons in
+   s390_isel_cc and should not occur here. */
+static UChar *
+s390_insn_test_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI opnd;
+
+   opnd = insn->variant.test.src;
+
+   switch (opnd.tag) {
+   case S390_OPND_REG: {
+      UInt reg = hregNumber(opnd.variant.reg);
+
+      switch (insn->size) {
+      case 4:
+         return s390_emit_LTR(buf, reg, reg);
+
+      case 8:
+         return s390_emit_LTGR(buf, reg, reg);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = opnd.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      switch (insn->size) {
+      case 4:
+         return s390_emit_LTw(buf, R0, x, b, DISP20(d));
+
+      case 8:
+         return s390_emit_LTGw(buf, R0, x, b, DISP20(d));
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = opnd.variant.imm;
+
+      switch (insn->size) {
+      case 4:
+         buf = s390_emit_load_32imm(buf, R0, value);
+         return s390_emit_LTR(buf, R0, R0);
+
+      case 8:
+         buf = s390_emit_load_64imm(buf, R0, value);
+         return s390_emit_LTGR(buf, R0, R0);
+
+      default:
+         goto fail;
+      }
+   }
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_test_emit");
+}
+
+
+static UChar *
+s390_insn_cc2bool_emit(UChar *buf, const s390_insn *insn)
+{
+   UChar r1 = hregNumber(insn->variant.cc2bool.dst);
+   s390_cc_t cond = insn->variant.cc2bool.cond;
+
+   /* Make the destination register be 1 or 0, depending on whether
+      the relevant condition holds. A 64-bit value is computed. */
+   if (cond == S390_CC_ALWAYS)
+      return s390_emit_LGHI(buf, r1, 1);  /* r1 = 1 */
+
+   buf = s390_emit_load_cc(buf, r1);                 /* r1 = cc */
+   buf = s390_emit_LGHI(buf, R0, cond);              /* r0 = mask */
+   buf = s390_emit_SLLG(buf, r1, R0, r1, DISP20(0)); /* r1 = mask << cc */
+   buf = s390_emit_SRLG(buf, r1, r1, 0,  DISP20(3)); /* r1 = r1 >> 3 */
+   buf = s390_emit_NILL(buf, r1, 1);                 /* r1 = r1 & 0x1 */
+
+   return buf;
+}
+
+
+/* Only 4-byte and 8-byte operands are handled. */
+static UChar *
+s390_insn_cas_emit(UChar *buf, const s390_insn *insn)
+{
+   UChar r1, r3, b, old;
+   Int d;
+   s390_amode *am;
+
+   r1 = hregNumber(insn->variant.cas.op1); /* expected value */
+   r3 = hregNumber(insn->variant.cas.op3);
+   old= hregNumber(insn->variant.cas.old_mem);
+   am = insn->variant.cas.op2;
+   b  = hregNumber(am->b);
+   d  = am->d;
+
+   switch (insn->size) {
+   case 4:
+      /* r1 must no be overwritten. So copy it to R0 and let CS clobber it */
+      buf = s390_emit_LR(buf, R0, r1);
+      if (am->tag == S390_AMODE_B12)
+         buf = s390_emit_CS(buf, R0, r3, b, d);
+      else
+         buf = s390_emit_CSY(buf, R0, r3, b, DISP20(d));
+      /* Now copy R0 which has the old memory value to OLD */
+      return s390_emit_LR(buf, old, R0);
+
+   case 8:
+      /* r1 must no be overwritten. So copy it to R0 and let CS clobber it */
+      buf = s390_emit_LGR(buf, R0, r1);
+      buf = s390_emit_CSG(buf, R0, r3, b, DISP20(d));
+      /* Now copy R0 which has the old memory value to OLD */
+      return s390_emit_LGR(buf, old, R0);
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_cas_emit");
+}
+
+
+/* Only 4-byte and 8-byte comparisons are handled. 1-byte and 2-byte
+   comparisons will have been converted to 4-byte comparisons in
+   s390_isel_cc and should not occur here. */
+static UChar *
+s390_insn_compare_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI op2;
+   HReg op1;
+   Bool signed_comparison;
+
+   op1 = insn->variant.compare.src1;
+   op2 = insn->variant.compare.src2;
+   signed_comparison = insn->variant.compare.signed_comparison;
+
+   switch (op2.tag) {
+   case S390_OPND_REG: {
+      UInt r1 = hregNumber(op1);
+      UInt r2 = hregNumber(op2.variant.reg);
+
+      switch (insn->size) {
+      case 4:
+         if (signed_comparison)
+            return s390_emit_CR(buf, r1, r2);
+         else
+            return s390_emit_CLR(buf, r1, r2);
+
+      case 8:
+         if (signed_comparison)
+            return s390_emit_CGR(buf, r1, r2);
+         else
+            return s390_emit_CLGR(buf, r1, r2);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      UChar r1 = hregNumber(op1);
+      const s390_amode *am = op2.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      switch (insn->size) {
+      case 4:
+         switch (am->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            if (signed_comparison)
+               return s390_emit_C(buf, r1, x, b, d);
+            else
+               return s390_emit_CL(buf, r1, x, b, d);
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            if (signed_comparison)
+               return s390_emit_CY(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_CLY(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      case 8:
+         if (signed_comparison)
+            return s390_emit_CG(buf, r1, x, b, DISP20(d));
+         else
+            return s390_emit_CLG(buf, r1, x, b, DISP20(d));
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      UChar r1 = hregNumber(op1);
+      ULong value = op2.variant.imm;
+
+      switch (insn->size) {
+      case 4:
+         if (signed_comparison)
+            return s390_emit_CFIw(buf, r1, value);
+         else
+            return s390_emit_CLFIw(buf, r1, value);
+
+      case 8:
+         buf = s390_emit_load_64imm(buf, R0, value);
+         if (signed_comparison)
+            return s390_emit_CGR(buf, r1, R0);
+         else
+            return s390_emit_CLGR(buf, r1, R0);
+
+      default:
+         goto fail;
+      }
+   }
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_compare_emit");
+}
+
+
+static UChar *
+s390_insn_mul_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI op2;
+   UChar r1;
+   Bool signed_multiply;
+
+   /* The register number identifying the register pair */
+   r1  = hregNumber(insn->variant.mul.dst_hi);
+
+   op2 = insn->variant.mul.op2;
+   signed_multiply = insn->variant.mul.signed_multiply;
+
+   switch (op2.tag) {
+   case S390_OPND_REG: {
+      UInt r2 = hregNumber(op2.variant.reg);
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+      case 4:
+         if (signed_multiply)
+            return s390_emit_MR(buf, r1, r2);
+         else
+            return s390_emit_MLR(buf, r1, r2);
+
+      case 8:
+         if (signed_multiply)
+            vpanic("s390_insn_mul_emit");
+         else
+            return s390_emit_MLGR(buf, r1, r2);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = op2.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+         /* Load bytes into scratch register R0, then multiply */
+         buf = s390_emit_load_mem(buf, insn->size, R0, am);
+         if (signed_multiply)
+            return s390_emit_MR(buf, r1, R0);
+         else
+            return s390_emit_MLR(buf, r1, R0);
+
+      case 4:
+         switch (am->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            if (signed_multiply)
+               return s390_emit_M(buf, r1, x, b, d);
+            else
+               return s390_emit_ML(buf, r1, x, b, DISP20(d));
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            if (signed_multiply)
+               return s390_emit_MFYw(buf, r1, x, b, DISP20(d));
+            else
+               return s390_emit_ML(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      case 8:
+         if (signed_multiply)
+            vpanic("s390_insn_mul_emit");
+         else
+            return s390_emit_MLG(buf, r1, x, b, DISP20(d));
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = op2.variant.imm;
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+      case 4:
+         buf = s390_emit_load_32imm(buf, R0, value);
+         if (signed_multiply)
+            return s390_emit_MR(buf, r1, R0);
+         else
+            return s390_emit_MLR(buf, r1, R0);
+
+      case 8:
+         buf = s390_emit_load_64imm(buf, R0, value);
+         if (signed_multiply)
+            vpanic("s390_insn_mul_emit");
+         else
+            return s390_emit_MLGR(buf, r1, R0);
+
+      default:
+         goto fail;
+      }
+   }
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_mul_emit");
+}
+
+
+static UChar *
+s390_insn_div_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI op2;
+   UChar r1;
+   Bool signed_divide;
+
+   r1  = hregNumber(insn->variant.div.op1_hi);
+   op2 = insn->variant.div.op2;
+   signed_divide = insn->variant.div.signed_divide;
+
+   switch (op2.tag) {
+   case S390_OPND_REG: {
+      UInt r2 = hregNumber(op2.variant.reg);
+
+      switch (insn->size) {
+      case 4:
+         if (signed_divide)
+            return s390_emit_DR(buf, r1, r2);
+         else
+            return s390_emit_DLR(buf, r1, r2);
+
+      case 8:
+         if (signed_divide)
+            vpanic("s390_insn_div_emit");
+         else
+            return s390_emit_DLGR(buf, r1, r2);
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = op2.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      switch (insn->size) {
+      case 4:
+         switch (am->tag) {
+         case S390_AMODE_B12:
+         case S390_AMODE_BX12:
+            if (signed_divide)
+               return s390_emit_D(buf, r1, x, b, d);
+            else
+               return s390_emit_DL(buf, r1, x, b, DISP20(d));
+
+         case S390_AMODE_B20:
+         case S390_AMODE_BX20:
+            if (signed_divide) {
+               buf = s390_emit_LY(buf, R0, x, b, DISP20(d));
+               return s390_emit_DR(buf, r1, R0);
+            } else
+               return s390_emit_DL(buf, r1, x, b, DISP20(d));
+         }
+         goto fail;
+
+      case 8:
+         if (signed_divide)
+            vpanic("s390_insn_div_emit");
+         else
+            return s390_emit_DLG(buf, r1, x, b, DISP20(d));
+
+      default:
+         goto fail;
+      }
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = op2.variant.imm;
+
+      switch (insn->size) {
+      case 4:
+         buf = s390_emit_load_32imm(buf, R0, value);
+         if (signed_divide)
+            return s390_emit_DR(buf, r1, R0);
+         else
+            return s390_emit_DLR(buf, r1, R0);
+
+      case 8:
+         buf = s390_emit_load_64imm(buf, R0, value);
+         if (signed_divide)
+            vpanic("s390_insn_div_emit");
+         else
+            return s390_emit_DLGR(buf, r1, R0);
+
+      default:
+         goto fail;
+      }
+   }
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_div_emit");
+}
+
+
+static UChar *
+s390_insn_divs_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI op2;
+   UChar r1;
+
+   r1  = hregNumber(insn->variant.divs.rem);
+   op2 = insn->variant.divs.op2;
+
+   switch (op2.tag) {
+   case S390_OPND_REG: {
+      UInt r2 = hregNumber(op2.variant.reg);
+
+      return s390_emit_DSGR(buf, r1, r2);
+   }
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = op2.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      return s390_emit_DSG(buf, r1, x, b, DISP20(d));
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = op2.variant.imm;
+
+      buf = s390_emit_load_64imm(buf, R0, value);
+      return s390_emit_DSGR(buf, r1, R0);
+   }
+
+   default:
+      goto fail;
+   }
+
+ fail:
+   vpanic("s390_insn_divs_emit");
+}
+
+
+static UChar *
+s390_insn_clz_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI src;
+   UChar r1, r1p1, r2, *p;
+
+   r1   = hregNumber(insn->variant.clz.num_bits);
+   r1p1 = hregNumber(insn->variant.clz.clobber);
+
+   vassert((r1 & 0x1) == 0);
+   vassert(r1p1 == r1 + 1);
+
+   p = buf;
+   src = insn->variant.clz.src;
+
+   /* Get operand and move it to r2 */
+   switch (src.tag) {
+   case S390_OPND_REG:
+      r2 = hregNumber(src.variant.reg);
+      break;
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = src.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      p  = s390_emit_LG(p, R0, x, b, DISP20(d));
+      r2 = R0;
+      break;
+   }
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = src.variant.imm;
+
+      p  = s390_emit_load_64imm(p, R0, value);
+      r2 = R0;
+      break;
+   }
+
+   default:
+      goto fail;
+   }
+
+   /* Use FLOGR if you can */
+   if (s390_host_has_eimm) {
+      return s390_emit_FLOGR(p, r1, r2);
+   }
+
+   /*
+      r0 = r2;
+      r1 = 64;
+      while (r0 != 0) {
+        r1 -= 1;
+        r0 >>= 1;
+      }
+   */
+   p = s390_emit_LTGR(p, R0, r2);
+   p = s390_emit_LLILL(p, r1,  64);
+
+   p = s390_emit_BRC(p, S390_CC_E, (4 + 4 + 6 + 4 + 4)/ 2);  /* 4 bytes */
+   p = s390_emit_AGHI(p, r1, (UShort)-1);         /* r1  -= 1;  4 bytes */
+   p = s390_emit_SRLG(p, R0, R0, R0, DISP20(1));  /* r0 >>= 1;  6 bytes */
+   p = s390_emit_LTGR(p, R0, R0);                 /* set cc     4 bytes */
+   p = s390_emit_BRC(p, S390_CC_NE,               /*            4 bytes */
+                     (UShort)(-(4 + 6 + 4) / 2));
+   return p;
+
+ fail:
+   vpanic("s390_insn_clz_emit");
+}
+
+
+static UChar *
+s390_insn_branch_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_opnd_RMI dst;
+   s390_cc_t cond;
+   UInt       trc;
+   UChar *p, *ptmp = 0;  /* avoid compiler warnings */
+
+   cond = insn->variant.branch.cond;
+   dst  = insn->variant.branch.dst;
+
+   p = buf;
+   trc = 0;
+
+   if (cond != S390_CC_ALWAYS) {
+      /* So we have something like this
+         if (cond) goto X;
+         Y: ...
+         We convert this into
+         if (! cond) goto Y;        // BRC insn; 4 bytes
+         return_reg = X;
+         return to dispatcher
+         Y:
+      */
+      ptmp = p; /* 4 bytes (a BRC insn) to be filled in here */
+      p += 4;
+   }
+
+   /* If a non-boring, set guest-state-pointer appropriately. */
+
+   switch (insn->variant.branch.kind) {
+   case Ijk_ClientReq:   trc = VEX_TRC_JMP_CLIENTREQ;   break;
+   case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break;
+   case Ijk_Yield:       trc = VEX_TRC_JMP_YIELD;       break;
+   case Ijk_EmWarn:      trc = VEX_TRC_JMP_EMWARN;      break;
+   case Ijk_EmFail:      trc = VEX_TRC_JMP_EMFAIL;      break;
+   case Ijk_MapFail:     trc = VEX_TRC_JMP_MAPFAIL;     break;
+   case Ijk_NoDecode:    trc = VEX_TRC_JMP_NODECODE;    break;
+   case Ijk_TInval:      trc = VEX_TRC_JMP_TINVAL;      break;
+   case Ijk_NoRedir:     trc = VEX_TRC_JMP_NOREDIR;     break;
+   case Ijk_SigTRAP:     trc = VEX_TRC_JMP_SIGTRAP;     break;
+   case Ijk_Ret:         trc = 0; break;
+   case Ijk_Call:        trc = 0; break;
+   case Ijk_Boring:      trc = 0; break;
+      break;
+
+   default:
+      vpanic("s390_insn_branch_emit: unknown jump kind");
+   }
+
+   /* Get the destination address into the return register */
+   switch (dst.tag) {
+   case S390_OPND_REG:
+      p = s390_emit_LGR(p, S390_REGNO_RETURN_VALUE, hregNumber(dst.variant.reg));
+      break;
+
+   case S390_OPND_AMODE: {
+      const s390_amode *am = dst.variant.am;
+      UChar b = hregNumber(am->b);
+      UChar x = hregNumber(am->x);
+      Int   d = am->d;
+
+      p = s390_emit_LG(p, S390_REGNO_RETURN_VALUE, x, b, DISP20(d));
+      break;
+   }
+
+   case S390_OPND_IMMEDIATE:
+      p = s390_emit_load_64imm(p, S390_REGNO_RETURN_VALUE, dst.variant.imm);
+      break;
+
+   default:
+      goto fail;
+   }
+
+   if (trc != 0) {
+      /* Something special. Set guest-state pointer appropriately */
+      p = s390_emit_LGHI(p, S390_REGNO_GUEST_STATE_POINTER, trc);
+   } else {
+      /* Nothing special needs to be done for calls and returns. */
+   }
+
+   p = s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_LINK_REGISTER);
+
+   if (cond != S390_CC_ALWAYS) {
+      Int delta = p - ptmp;
+
+      delta >>= 1;  /* immediate constant is #half-words */
+      vassert(delta > 0 && delta < (1 << 16));
+      s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+   }
+
+   return p;
+
+ fail:
+   vpanic("s390_insn_branch_emit");
+}
+
+
+static UChar *
+s390_insn_helper_call_emit(UChar *buf, const s390_insn *insn)
+{
+   s390_cc_t cond;
+   ULong target;
+   UChar *ptmp = buf;
+
+   cond = insn->variant.helper_call.cond;
+   target = insn->variant.helper_call.target;
+
+   if (cond != S390_CC_ALWAYS) {
+      /* So we have something like this
+         if (cond) call X;
+         Y: ...
+         We convert this into
+         if (! cond) goto Y;        // BRC opcode; 4 bytes
+         call X;
+         Y:
+      */
+      /* 4 bytes (a BRC insn) to be filled in here */
+      buf += 4;
+   }
+
+   /* Load the target address into a register, that
+      (a) is not used for passing parameters to the helper and
+      (b) can be clobbered by the callee
+      r1 looks like a good choice.
+      Also, need to arrange for the return address be put into the
+      link-register */
+   buf = s390_emit_load_64imm(buf, 1, target);
+
+   /* Stash away the client's FPC register because the helper might change it. */
+   buf = s390_emit_STFPC(buf, S390_REGNO_STACK_POINTER, S390_OFFSET_SAVED_FPC_C);
+
+   /* Before we can call the helper, we need to save the link register,
+      because the BASR will overwrite it. We cannot use a register for that.
+      (a) Volatile registers will be modified by the helper.
+      (b) For saved registers the client code assumes that they have not
+          changed after the function returns. So we cannot use it to store
+          the link register.
+      In the dispatcher, before calling the client code, we have arranged for
+      a location on the stack for this purpose. See dispatch-s390x-linux.S. */
+   buf = s390_emit_STG(buf, S390_REGNO_LINK_REGISTER, 0,        // save LR
+                       S390_REGNO_STACK_POINTER, S390_OFFSET_SAVED_LR, 0);
+   buf = s390_emit_BASR(buf, S390_REGNO_LINK_REGISTER, 1);      // call helper
+   buf = s390_emit_LG(buf, S390_REGNO_LINK_REGISTER, 0,         // restore LR
+                      S390_REGNO_STACK_POINTER, S390_OFFSET_SAVED_LR, 0);
+   buf = s390_emit_LFPC(buf, S390_REGNO_STACK_POINTER,          // restore FPC
+                        S390_OFFSET_SAVED_FPC_C);
+
+   if (cond != S390_CC_ALWAYS) {
+      Int delta = buf - ptmp;
+
+      delta >>= 1;  /* immediate constant is #half-words */
+      vassert(delta > 0 && delta < (1 << 16));
+      s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+   }
+
+   return buf;
+}
+
+
+static UChar *
+s390_insn_cond_move_emit(UChar *buf, const s390_insn *insn)
+{
+   HReg dst;
+   s390_opnd_RMI src;
+   s390_cc_t cond;
+   UChar *p, *ptmp = 0;   /* avoid compiler warnings */
+
+   cond = insn->variant.cond_move.cond;
+   dst  = insn->variant.cond_move.dst;
+   src  = insn->variant.cond_move.src;
+
+   p = buf;
+
+   /* Branch (if cond fails) over move instrs */
+   if (cond != S390_CC_ALWAYS) {
+      /* Don't know how many bytes to jump over yet.
+         Make space for a BRC instruction (4 bytes) and fill in later. */
+      ptmp = p;   /*  to be filled in here */
+      p += 4;
+   }
+
+   // cond true: move src => dst
+
+   switch (src.tag) {
+   case S390_OPND_REG:
+      p = s390_emit_LGR(p, hregNumber(dst), hregNumber(src.variant.reg));
+      break;
+
+   case S390_OPND_AMODE:
+      p = s390_emit_load_mem(p, insn->size, hregNumber(dst), src.variant.am);
+      break;
+
+   case S390_OPND_IMMEDIATE: {
+      ULong value = src.variant.imm;
+      UInt  r = hregNumber(dst);
+
+      switch (insn->size) {
+      case 1:
+      case 2:
+         /* Load the immediate values as a 4 byte value. That does not hurt as
+            those extra bytes will not be looked at. Fall through .... */
+      case 4:
+         p = s390_emit_load_32imm(p, r, value);
+         break;
+
+      case 8:
+         p = s390_emit_load_64imm(p, r, value);
+         break;
+      }
+      break;
+   }
+
+   default:
+      goto fail;
+   }
+
+   if (cond != S390_CC_ALWAYS) {
+      Int delta = p - ptmp;
+
+      delta >>= 1;  /* immediate constant is #half-words */
+      vassert(delta > 0 && delta < (1 << 16));
+      s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+   }
+
+   return p;
+
+ fail:
+   vpanic("s390_insn_cond_move_emit");
+}
+
+
+/* Little helper function to the rounding mode in the real FPC
+   register */
+static UChar *
+s390_set_fpc_rounding_mode(UChar *buf, s390_round_t rounding_mode)
+{
+   UChar bits;
+
+   /* Determine BFP rounding bits */
+   switch (rounding_mode) {
+   case S390_ROUND_NEAREST_EVEN: bits = 0; break;
+   case S390_ROUND_ZERO:         bits = 1; break;
+   case S390_ROUND_POSINF:       bits = 2; break;
+   case S390_ROUND_NEGINF:       bits = 3; break;
+   default: vpanic("invalid rounding mode\n");
+   }
+
+   /* Copy FPC from guest state to R0 and OR in the new rounding mode */
+   buf = s390_emit_L(buf, R0, 0, S390_REGNO_GUEST_STATE_POINTER,
+                     OFFSET_s390x_fpc);   // r0 = guest_fpc
+
+   buf = s390_emit_NILL(buf, R0, 0xFFFC); /* Clear out right-most 2 bits */
+   buf = s390_emit_OILL(buf, R0, bits);   /* OR in the new rounding mode */
+   buf = s390_emit_SFPC(buf, R0, 0);      /* Load FPC register from R0 */
+
+   return buf;
+}
+
+
+static UChar *
+s390_insn_bfp_triop_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1 = hregNumber(insn->variant.bfp_triop.dst);
+   UInt r2 = hregNumber(insn->variant.bfp_triop.op2);
+   UInt r3 = hregNumber(insn->variant.bfp_triop.op3);
+   s390_round_t rounding_mode = insn->variant.bfp_triop.rounding_mode;
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      buf = s390_set_fpc_rounding_mode(buf, rounding_mode);
+   }
+
+   switch (insn->size) {
+   case 4:
+      switch (insn->variant.bfp_triop.tag) {
+      case S390_BFP_MADD:  buf = s390_emit_MAEBR(buf, r1, r3, r2); break;
+      case S390_BFP_MSUB:  buf = s390_emit_MSEBR(buf, r1, r3, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   case 8:
+      switch (insn->variant.bfp_triop.tag) {
+      case S390_BFP_MADD:  buf = s390_emit_MADBR(buf, r1, r3, r2); break;
+      case S390_BFP_MSUB:  buf = s390_emit_MSDBR(buf, r1, r3, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   default:  goto fail;
+   }
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      /* Restore FPC register from guest state */
+      buf = s390_emit_LFPC(buf, S390_REGNO_GUEST_STATE_POINTER,
+                           OFFSET_s390x_fpc);   // fpc = guest_fpc
+   }
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp_triop_emit");
+}
+
+
+static UChar *
+s390_insn_bfp_binop_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1 = hregNumber(insn->variant.bfp_binop.dst);
+   UInt r2 = hregNumber(insn->variant.bfp_binop.op2);
+   s390_round_t rounding_mode = insn->variant.bfp_binop.rounding_mode;
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      buf = s390_set_fpc_rounding_mode(buf, rounding_mode);
+   }
+
+   switch (insn->size) {
+   case 4:
+      switch (insn->variant.bfp_binop.tag) {
+      case S390_BFP_ADD:     buf = s390_emit_AEBR(buf, r1, r2);  break;
+      case S390_BFP_SUB:     buf = s390_emit_SEBR(buf, r1, r2);  break;
+      case S390_BFP_MUL:     buf = s390_emit_MEEBR(buf, r1, r2); break;
+      case S390_BFP_DIV:     buf = s390_emit_DEBR(buf, r1, r2);  break;
+      default:  goto fail;
+      }
+      break;
+
+   case 8:
+      switch (insn->variant.bfp_binop.tag) {
+      case S390_BFP_ADD:     buf = s390_emit_ADBR(buf, r1, r2); break;
+      case S390_BFP_SUB:     buf = s390_emit_SDBR(buf, r1, r2); break;
+      case S390_BFP_MUL:     buf = s390_emit_MDBR(buf, r1, r2); break;
+      case S390_BFP_DIV:     buf = s390_emit_DDBR(buf, r1, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   default:  goto fail;
+   }
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      /* Restore FPC register from guest state */
+      buf = s390_emit_LFPC(buf, S390_REGNO_GUEST_STATE_POINTER,
+                           OFFSET_s390x_fpc);
+   }
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp_binop_emit");
+}
+
+
+static UChar *
+s390_insn_bfp_unop_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt  r1 = hregNumber(insn->variant.bfp_unop.dst);
+   UInt  r2 = hregNumber(insn->variant.bfp_unop.op);
+   s390_round_t rounding_mode = insn->variant.bfp_unop.rounding_mode;
+   s390_round_t m3 = rounding_mode;
+
+   /* The "convert to fixed" instructions have a field for the rounding
+      mode and no FPC modification is necessary. So we handle them
+      upfront. */
+   switch (insn->variant.bfp_unop.tag) {
+   case S390_BFP_F32_TO_I32:  return s390_emit_CFEBR(buf, m3, r1, r2);
+   case S390_BFP_F64_TO_I32:  return s390_emit_CFDBR(buf, m3, r1, r2);
+   case S390_BFP_F32_TO_I64:  return s390_emit_CGEBR(buf, m3, r1, r2);
+   case S390_BFP_F64_TO_I64:  return s390_emit_CGDBR(buf, m3, r1, r2);
+   default: break;
+   }
+
+   /* For all other insns if a special rounding mode is requested,
+      we need to set the FPC first and restore it later. */
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      buf = s390_set_fpc_rounding_mode(buf, rounding_mode);
+   }
+
+   switch (insn->variant.bfp_unop.tag) {
+   case S390_BFP_ABS:
+      switch (insn->size) {
+      case 4:   buf = s390_emit_LPEBR(buf, r1, r2); break;
+      case 8:   buf = s390_emit_LPDBR(buf, r1, r2); break;
+      case 16:  buf = s390_emit_LPXBR(buf, r1, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   case S390_BFP_NABS:
+      switch (insn->size) {
+      case 4:   buf = s390_emit_LNEBR(buf, r1, r2); break;
+      case 8:   buf = s390_emit_LNDBR(buf, r1, r2); break;
+      case 16:  buf = s390_emit_LNXBR(buf, r1, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   case S390_BFP_NEG:
+      switch (insn->size) {
+      case 4:   buf = s390_emit_LCEBR(buf, r1, r2); break;
+      case 8:   buf = s390_emit_LCDBR(buf, r1, r2); break;
+      case 16:  buf = s390_emit_LCXBR(buf, r1, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   case S390_BFP_SQRT:
+      switch (insn->size) {
+      case 4:   buf = s390_emit_SQEBR(buf, r1, r2); break;
+      case 8:   buf = s390_emit_SQDBR(buf, r1, r2); break;
+      case 16:  buf = s390_emit_SQXBR(buf, r1, r2); break;
+      default:  goto fail;
+      }
+      break;
+
+   case S390_BFP_I32_TO_F32:  buf = s390_emit_CEFBR(buf, r1, r2); break;
+   case S390_BFP_I32_TO_F64:  buf = s390_emit_CDFBR(buf, r1, r2); break;
+   case S390_BFP_I32_TO_F128: buf = s390_emit_CXFBR(buf, r1, r2); break;
+   case S390_BFP_I64_TO_F32:  buf = s390_emit_CEGBR(buf, r1, r2); break;
+   case S390_BFP_I64_TO_F64:  buf = s390_emit_CDGBR(buf, r1, r2); break;
+   case S390_BFP_I64_TO_F128: buf = s390_emit_CXGBR(buf, r1, r2); break;
+
+   case S390_BFP_F32_TO_F64:  buf = s390_emit_LDEBR(buf, r1, r2); break;
+   case S390_BFP_F32_TO_F128: buf = s390_emit_LXEBR(buf, r1, r2); break;
+   case S390_BFP_F64_TO_F32:  buf = s390_emit_LEDBR(buf, r1, r2); break;
+   case S390_BFP_F64_TO_F128: buf = s390_emit_LXDBR(buf, r1, r2); break;
+
+   default: goto fail;
+   }
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      /* Restore FPC register from guest state */
+      buf = s390_emit_LFPC(buf, S390_REGNO_GUEST_STATE_POINTER,
+                           OFFSET_s390x_fpc);   // fpc = guest_fpc
+   }
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp_unop_emit");
+}
+
+
+static UChar *
+s390_insn_bfp_compare_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt dst = hregNumber(insn->variant.bfp_compare.dst);
+   UInt r1  = hregNumber(insn->variant.bfp_compare.op1);
+   UInt r2  = hregNumber(insn->variant.bfp_compare.op2);
+
+   switch (insn->size) {
+   case 4:
+      buf = s390_emit_CEBR(buf, r1, r2);
+      break;
+
+   case 8:
+      buf = s390_emit_CDBR(buf, r1, r2);
+      break;
+
+   default:  goto fail;
+   }
+
+   return s390_emit_load_cc(buf, dst);  /* Load condition code into DST */
+
+ fail:
+   vpanic("s390_insn_bfp_compare_emit");
+}
+
+
+static UChar *
+s390_insn_bfp128_binop_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1_hi = hregNumber(insn->variant.bfp128_binop.dst_hi);
+   UInt r1_lo = hregNumber(insn->variant.bfp128_binop.dst_lo);
+   UInt r2_hi = hregNumber(insn->variant.bfp128_binop.op2_hi);
+   UInt r2_lo = hregNumber(insn->variant.bfp128_binop.op2_lo);
+   s390_round_t rounding_mode = insn->variant.bfp_binop.rounding_mode;
+
+   /* Paranoia */
+   vassert(insn->size == 16);
+   vassert(r1_lo == r1_hi + 2);
+   vassert(r2_lo == r2_hi + 2);
+   vassert((r1_hi & 0x2) == 0);
+   vassert((r2_hi & 0x2) == 0);
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      buf = s390_set_fpc_rounding_mode(buf, rounding_mode);
+   }
+
+   switch (insn->variant.bfp128_binop.tag) {
+   case S390_BFP_ADD:     buf = s390_emit_AXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_SUB:     buf = s390_emit_SXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_MUL:     buf = s390_emit_MXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_DIV:     buf = s390_emit_DXBR(buf, r1_hi, r2_hi); break;
+   default:  goto fail;
+   }
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      /* Restore FPC register from guest state */
+      buf = s390_emit_LFPC(buf, S390_REGNO_GUEST_STATE_POINTER,
+                           OFFSET_s390x_fpc);   // fpc = guest_fpc
+   }
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp128_binop_emit");
+}
+
+
+static UChar *
+s390_insn_bfp128_compare_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt dst   = hregNumber(insn->variant.bfp128_compare.dst);
+   UInt r1_hi = hregNumber(insn->variant.bfp128_compare.op1_hi);
+   UInt r1_lo = hregNumber(insn->variant.bfp128_compare.op1_lo);
+   UInt r2_hi = hregNumber(insn->variant.bfp128_compare.op2_hi);
+   UInt r2_lo = hregNumber(insn->variant.bfp128_compare.op2_lo);
+
+   /* Paranoia */
+   vassert(insn->size == 16);
+   vassert(r1_lo == r1_hi + 2);
+   vassert(r2_lo == r2_hi + 2);
+   vassert((r1_hi & 0x2) == 0);
+   vassert((r2_hi & 0x2) == 0);
+
+   buf = s390_emit_CXBR(buf, r1_hi, r2_hi);
+
+   /* Load condition code into DST */
+   return s390_emit_load_cc(buf, dst);
+}
+
+
+static UChar *
+s390_insn_bfp128_unop_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1_hi = hregNumber(insn->variant.bfp128_unop.dst_hi);
+   UInt r1_lo = hregNumber(insn->variant.bfp128_unop.dst_lo);
+   UInt r2_hi = hregNumber(insn->variant.bfp128_unop.op_hi);
+   UInt r2_lo = hregNumber(insn->variant.bfp128_unop.op_lo);
+   s390_round_t rounding_mode = insn->variant.bfp_binop.rounding_mode;
+
+   /* Paranoia */
+   vassert(insn->size == 16);
+   vassert(r1_lo == r1_hi + 2);
+   vassert(r2_lo == r2_hi + 2);
+   vassert((r1_hi & 0x2) == 0);
+   vassert((r2_hi & 0x2) == 0);
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      buf = s390_set_fpc_rounding_mode(buf, rounding_mode);
+   }
+
+   switch (insn->variant.bfp128_unop.tag) {
+   case S390_BFP_ABS:         buf = s390_emit_LPXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_NABS:        buf = s390_emit_LNXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_NEG:         buf = s390_emit_LCXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_SQRT:        buf = s390_emit_SQXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_F128_TO_F32: buf = s390_emit_LEXBR(buf, r1_hi, r2_hi); break;
+   case S390_BFP_F128_TO_F64: buf = s390_emit_LDXBR(buf, r1_hi, r2_hi); break;
+   default:  goto fail;
+   }
+
+   if (rounding_mode != S390_ROUND_NEAREST_EVEN) {
+      /* Restore FPC register from guest state */
+      buf = s390_emit_LFPC(buf, S390_REGNO_GUEST_STATE_POINTER,
+                           OFFSET_s390x_fpc);   // fpc = guest_fpc
+   }
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp128_unop_emit");
+}
+
+
+/* Conversion to 128-bit BFP does not require a rounding mode */
+static UChar *
+s390_insn_bfp128_convert_to_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1_hi = hregNumber(insn->variant.bfp128_unop.dst_hi);
+   UInt r1_lo = hregNumber(insn->variant.bfp128_unop.dst_lo);
+   UInt r2    = hregNumber(insn->variant.bfp128_unop.op_hi);
+
+   /* Paranoia */
+   vassert(insn->size == 16);
+   vassert(r1_lo == r1_hi + 2);
+   vassert((r1_hi & 0x2) == 0);
+
+   switch (insn->variant.bfp128_unop.tag) {
+   case S390_BFP_I32_TO_F128: buf = s390_emit_CXFBR(buf, r1_hi, r2); break;
+   case S390_BFP_I64_TO_F128: buf = s390_emit_CXGBR(buf, r1_hi, r2); break;
+   case S390_BFP_F32_TO_F128: buf = s390_emit_LXEBR(buf, r1_hi, r2); break;
+   case S390_BFP_F64_TO_F128: buf = s390_emit_LXDBR(buf, r1_hi, r2); break;
+   default:  goto fail;
+   }
+
+   return buf;
+
+ fail:
+   vpanic("s390_insn_bfp128_convert_to_emit");
+}
+
+
+static UChar *
+s390_insn_bfp128_convert_from_emit(UChar *buf, const s390_insn *insn)
+{
+   UInt r1    = hregNumber(insn->variant.bfp128_unop.dst_hi);
+   UInt r2_hi = hregNumber(insn->variant.bfp128_unop.op_hi);
+   UInt r2_lo = hregNumber(insn->variant.bfp128_unop.op_lo);
+   s390_round_t rounding_mode = insn->variant.bfp_binop.rounding_mode;
+
+   /* Paranoia */
+   vassert(insn->size != 16);
+   vassert(r2_lo == r2_hi + 2);
+   vassert((r2_hi & 0x2) == 0);
+
+   /* The "convert to fixed" instructions have a field for the rounding
+      mode and no FPC modification is necessary. So we handle them
+      upfront. */
+   switch (insn->variant.bfp_unop.tag) {
+   case S390_BFP_F128_TO_I32: return s390_emit_CFXBR(buf, rounding_mode,
+                                                     r1, r2_hi);  break;
+   case S390_BFP_F128_TO_I64: return s390_emit_CGXBR(buf, rounding_mode,
+                                                     r1, r2_hi);  break;
+   default: break;
+   }
+
+   vpanic("s390_insn_bfp128_convert_from_emit");
+}
+
+
+static UChar *
+s390_insn_mfence_emit(UChar *buf, const s390_insn *insn)
+{
+   return s390_emit_BCR(buf, 0xF, 0x0);
+}
+
+
+Int
+emit_S390Instr(UChar *buf, Int nbuf, s390_insn *insn, Bool mode64,
+               void *dispatch_unassisted, void *dispatch_assisted)
+{
+   UChar *end;
+
+   switch (insn->tag) {
+   case S390_INSN_LOAD:
+      end = s390_insn_load_emit(buf, insn);
+      break;
+
+   case S390_INSN_STORE:
+      end = s390_insn_store_emit(buf, insn);
+      break;
+
+   case S390_INSN_MOVE:
+      end = s390_insn_move_emit(buf, insn);
+      break;
+
+   case S390_INSN_COND_MOVE:
+      end = s390_insn_cond_move_emit(buf, insn);
+      break;
+
+   case S390_INSN_LOAD_IMMEDIATE:
+      end = s390_insn_load_immediate_emit(buf, insn);
+      break;
+
+   case S390_INSN_ALU:
+      end = s390_insn_alu_emit(buf, insn);
+      break;
+
+   case S390_INSN_MUL:
+      end = s390_insn_mul_emit(buf, insn);
+      break;
+
+   case S390_INSN_DIV:
+      end = s390_insn_div_emit(buf, insn);
+      break;
+
+   case S390_INSN_DIVS:
+      end = s390_insn_divs_emit(buf, insn);
+      break;
+
+   case S390_INSN_CLZ:
+      end = s390_insn_clz_emit(buf, insn);
+      break;
+
+   case S390_INSN_UNOP:
+      end = s390_insn_unop_emit(buf, insn);
+      break;
+
+   case S390_INSN_TEST:
+      end = s390_insn_test_emit(buf, insn);
+      break;
+
+   case S390_INSN_CC2BOOL:
+      end = s390_insn_cc2bool_emit(buf, insn);
+      break;
+
+   case S390_INSN_CAS:
+      end = s390_insn_cas_emit(buf, insn);
+      break;
+
+   case S390_INSN_COMPARE:
+      end = s390_insn_compare_emit(buf, insn);
+      break;
+
+   case S390_INSN_BRANCH:
+      vassert(dispatch_unassisted == NULL);
+      vassert(dispatch_assisted == NULL);
+      end = s390_insn_branch_emit(buf, insn);
+      break;
+
+   case S390_INSN_HELPER_CALL:
+      end = s390_insn_helper_call_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP_TRIOP:
+      end = s390_insn_bfp_triop_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP_BINOP:
+      end = s390_insn_bfp_binop_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP_UNOP:
+      end = s390_insn_bfp_unop_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP_COMPARE:
+      end = s390_insn_bfp_compare_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP128_BINOP:
+      end = s390_insn_bfp128_binop_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP128_COMPARE:
+      end = s390_insn_bfp128_compare_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP128_UNOP:
+      end = s390_insn_bfp128_unop_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_TO:
+      end = s390_insn_bfp128_convert_to_emit(buf, insn);
+      break;
+
+   case S390_INSN_BFP128_CONVERT_FROM:
+      end = s390_insn_bfp128_convert_from_emit(buf, insn);
+      break;
+
+   case S390_INSN_MFENCE:
+      end = s390_insn_mfence_emit(buf, insn);
+      break;
+
+   default:
+      vpanic("s390_insn_emit");
+   }
+
+   vassert(end - buf <= nbuf);
+
+   return end - buf;
+}
+
+
+/*---------------------------------------------------------------*/
+/*--- end                                    host_s390_defs.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/host_s390_defs.h b/main/VEX/priv/host_s390_defs.h
new file mode 100644
index 0000000..2a5eddd
--- /dev/null
+++ b/main/VEX/priv/host_s390_defs.h
@@ -0,0 +1,495 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                  host_s390_defs.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#ifndef __VEX_HOST_S390_DEFS_H
+#define __VEX_HOST_S390_DEFS_H
+
+#include "libvex_basictypes.h"            /* Bool */
+#include "libvex.h"                       /* VexArchInfo */
+#include "main_util.h"                    /* needed for host_generic_regs.h */
+#include "host_generic_regs.h"            /* HReg */
+
+/* --------- Registers --------- */
+const HChar *s390_hreg_as_string(HReg);
+
+/* Dedicated registers */
+HReg s390_hreg_guest_state_pointer(void);
+
+
+/* Given the index of a function argument, return the number of the
+   general purpose register in which it is being passed. Arguments are
+   counted 0, 1, 2, ... and they are being passed in r2, r3, r4, ... */
+static __inline__ unsigned
+s390_gprno_from_arg_index(unsigned ix)
+{
+   return ix + 2;
+}
+
+/* --------- Memory address expressions (amodes). --------- */
+
+/* These are the address modes:
+   (1) b12:  base register + 12-bit unsigned offset   (e.g. RS)
+   (2) b20:  base register + 20-bit signed offset     (e.g. RSY)
+   (3) bx12: base register + index register + 12-bit unsigned offset (e.g. RX)
+   (4) bx20: base register + index register + 20-bit signed offset   (e.g. RXY)
+   fixs390: There is also pc-relative stuff.. e.g. LARL
+*/
+
+typedef enum {
+   S390_AMODE_B12,
+   S390_AMODE_B20,
+   S390_AMODE_BX12,
+   S390_AMODE_BX20
+} s390_amode_t;
+
+typedef struct {
+   s390_amode_t tag;
+   HReg b;
+   HReg x;       /* hregNumber(x) == 0  for S390_AMODE_B12/B20 kinds */
+   Int  d;       /* 12 bit unsigned or 20 bit signed */
+} s390_amode;
+
+
+s390_amode *s390_amode_b12(Int d, HReg b);
+s390_amode *s390_amode_b20(Int d, HReg b);
+s390_amode *s390_amode_bx12(Int d, HReg b, HReg x);
+s390_amode *s390_amode_bx20(Int d, HReg b, HReg x);
+s390_amode *s390_amode_for_guest_state(Int d);
+Bool        s390_amode_is_sane(const s390_amode *);
+
+const HChar *s390_amode_as_string(const s390_amode *);
+
+/* ------------- 2nd (right) operand of binary operation ---------------- */
+
+typedef enum {
+   S390_OPND_REG,
+   S390_OPND_IMMEDIATE,
+   S390_OPND_AMODE
+} s390_opnd_t;
+
+
+/* Naming convention for operand locations:
+   R    - GPR
+   I    - immediate value
+   M    - memory (any Amode may be used)
+*/
+
+/* An operand that is either in a GPR or is addressable via a BX20 amode */
+typedef struct {
+   s390_opnd_t tag;
+   union {
+      HReg        reg;
+      s390_amode *am;
+      ULong       imm;
+   } variant;
+} s390_opnd_RMI;
+
+
+/* The kind of instructions */
+typedef enum {
+   S390_INSN_LOAD,   /* load register from memory */
+   S390_INSN_STORE,  /* store register to memory */
+   S390_INSN_MOVE,   /* from register to register */
+   S390_INSN_COND_MOVE, /* conditonal "move" to register */
+   S390_INSN_LOAD_IMMEDIATE,
+   S390_INSN_ALU,
+   S390_INSN_MUL,    /* n-bit operands; 2n-bit result */
+   S390_INSN_DIV,    /* 2n-bit dividend; n-bit divisor; n-bit quot/rem */
+   S390_INSN_DIVS,   /* n-bit dividend; n-bit divisor; n-bit quot/rem */
+   S390_INSN_CLZ,    /* count left-most zeroes */
+   S390_INSN_UNOP,
+   S390_INSN_TEST,   /* test operand and set cc */
+   S390_INSN_CC2BOOL,/* convert condition code to 0/1 */
+   S390_INSN_COMPARE,
+   S390_INSN_BRANCH, /* un/conditional goto */
+   S390_INSN_HELPER_CALL,
+   S390_INSN_CAS,    /* compare and swap */
+   S390_INSN_BFP_BINOP, /* Binary floating point 32-bit / 64-bit */
+   S390_INSN_BFP_UNOP,
+   S390_INSN_BFP_TRIOP,
+   S390_INSN_BFP_COMPARE,
+   S390_INSN_BFP128_BINOP, /* Binary floating point 128-bit */
+   S390_INSN_BFP128_UNOP,
+   S390_INSN_BFP128_COMPARE,
+   S390_INSN_BFP128_CONVERT_TO,
+   S390_INSN_BFP128_CONVERT_FROM,
+   S390_INSN_MFENCE
+} s390_insn_tag;
+
+
+/* The kind of ALU instructions */
+typedef enum {
+   S390_ALU_ADD,
+   S390_ALU_SUB,
+   S390_ALU_MUL,   /* n-bit operands; result is lower n-bit of product */
+   S390_ALU_AND,
+   S390_ALU_OR,
+   S390_ALU_XOR,
+   S390_ALU_LSH,
+   S390_ALU_RSH,
+   S390_ALU_RSHA   /* arithmetic */
+} s390_alu_t;
+
+
+/* The kind of unary integer operations */
+typedef enum {
+   S390_ZERO_EXTEND_8,
+   S390_ZERO_EXTEND_16,
+   S390_ZERO_EXTEND_32,
+   S390_SIGN_EXTEND_8,
+   S390_SIGN_EXTEND_16,
+   S390_SIGN_EXTEND_32,
+   S390_NEGATE
+} s390_unop_t;
+
+/* The kind of ternary BFP operations */
+typedef enum {
+   S390_BFP_MADD,
+   S390_BFP_MSUB,
+} s390_bfp_triop_t;
+
+/* The kind of binary BFP operations */
+typedef enum {
+   S390_BFP_ADD,
+   S390_BFP_SUB,
+   S390_BFP_MUL,
+   S390_BFP_DIV
+} s390_bfp_binop_t;
+
+
+/* The kind of unary BFP operations */
+typedef enum {
+   S390_BFP_ABS,
+   S390_BFP_NABS,
+   S390_BFP_NEG,
+   S390_BFP_SQRT,
+   S390_BFP_I32_TO_F32,
+   S390_BFP_I32_TO_F64,
+   S390_BFP_I32_TO_F128,
+   S390_BFP_I64_TO_F32,
+   S390_BFP_I64_TO_F64,
+   S390_BFP_I64_TO_F128,
+   S390_BFP_F32_TO_I32,
+   S390_BFP_F32_TO_I64,
+   S390_BFP_F32_TO_F64,
+   S390_BFP_F32_TO_F128,
+   S390_BFP_F64_TO_I32,
+   S390_BFP_F64_TO_I64,
+   S390_BFP_F64_TO_F32,
+   S390_BFP_F64_TO_F128,
+   S390_BFP_F128_TO_I32,
+   S390_BFP_F128_TO_I64,
+   S390_BFP_F128_TO_F32,
+   S390_BFP_F128_TO_F64
+} s390_bfp_unop_t;
+
+
+/* Condition code. The encoding of the enumerators matches the value of
+   the mask field in the various branch opcodes. */
+typedef enum {
+   S390_CC_NEVER=  0,
+   S390_CC_OVFL =  1,   /* overflow */
+   S390_CC_H    =  2,   /* A > B ; high */
+   S390_CC_NLE  =  3,   /* not low or equal */
+   S390_CC_L    =  4,   /* A < B ; low */
+   S390_CC_NHE  =  5,   /* not high or equal */
+   S390_CC_LH   =  6,   /* low or high */
+   S390_CC_NE   =  7,   /* A != B ; not zero */
+   S390_CC_E    =  8,   /* A == B ; zero */
+   S390_CC_NLH  =  9,   /* not low or high */
+   S390_CC_HE   = 10,   /* A >= B ; high or equal*/
+   S390_CC_NL   = 11,   /* not low */
+   S390_CC_LE   = 12,   /* A <= B ; low or equal */
+   S390_CC_NH   = 13,   /* not high */
+   S390_CC_NO   = 14,   /* not overflow */
+   S390_CC_ALWAYS = 15
+} s390_cc_t;
+
+
+/* Rounding mode as it is encoded in the m3/m4 fields of certain
+   instructions (e.g. CFEBR) */
+typedef enum {
+/* S390_ROUND_NEAREST_AWAY = 1, not supported */
+   S390_ROUND_NEAREST_EVEN = 4,
+   S390_ROUND_ZERO         = 5,
+   S390_ROUND_POSINF       = 6,
+   S390_ROUND_NEGINF       = 7
+} s390_round_t;
+
+
+/* Invert the condition code */
+static __inline__ s390_cc_t
+s390_cc_invert(s390_cc_t cond)
+{
+   return S390_CC_ALWAYS - cond;
+}
+
+
+typedef struct {
+   s390_insn_tag tag;
+   UChar size;            /* size of the result in bytes */
+   union {
+      struct {
+         HReg        dst;
+         s390_amode *src;
+      } load;
+      struct {
+         s390_amode *dst;
+         HReg        src;
+      } store;
+      struct {
+         HReg        dst;
+         HReg        src;
+      } move;
+      struct {
+         s390_cc_t     cond;
+         HReg          dst;
+         s390_opnd_RMI src;
+      } cond_move;
+      struct {
+         HReg        dst;
+         ULong       value;  /* not sign extended */
+      } load_immediate;
+      /* add, and, or, xor */
+      struct {
+         s390_alu_t    tag;
+         HReg          dst; /* op1 */
+         s390_opnd_RMI op2;
+      } alu;
+      struct {
+         Bool          signed_multiply;
+         HReg          dst_hi;  /*           r10 */
+         HReg          dst_lo;  /* also op1  r11 */
+         s390_opnd_RMI op2;
+      } mul;
+      struct {
+         Bool          signed_divide;
+         HReg          op1_hi;  /* also remainder   r10 */
+         HReg          op1_lo;  /* also quotient    r11 */
+         s390_opnd_RMI op2;
+      } div;
+      struct {
+         HReg          rem; /* remainder      r10 */
+         HReg          op1; /* also quotient  r11 */
+         s390_opnd_RMI op2;
+      } divs;
+      struct {
+         HReg          num_bits; /* number of leftmost '0' bits  r10 */
+         HReg          clobber;  /* unspecified                  r11 */
+         s390_opnd_RMI src;
+      } clz;
+      struct {
+         s390_unop_t   tag;
+         HReg          dst;
+         s390_opnd_RMI src;
+      } unop;
+      struct {
+         Bool          signed_comparison;
+         HReg          src1;
+         s390_opnd_RMI src2;
+      } compare;
+      struct {
+         HReg          dst;  /* condition code in s390 encoding */
+         HReg          op1;
+         HReg          op2;
+      } bfp_compare;
+      struct {
+         s390_opnd_RMI src;
+      } test;
+      /* Convert the condition code to a boolean value. */
+      struct {
+         s390_cc_t cond;
+         HReg      dst;
+      } cc2bool;
+      struct {
+         HReg        op1;
+         s390_amode *op2;
+         HReg        op3;
+         HReg        old_mem;
+      } cas;
+      struct {
+         IRJumpKind    kind;
+         s390_cc_t     cond;
+         s390_opnd_RMI dst;
+      } branch;
+      /* Pseudo-insn for representing a helper call.
+         TARGET is the absolute address of the helper function
+         NUM_ARGS says how many arguments are being passed.
+         All arguments have integer type and are being passed according to ABI,
+         i.e. in registers r2, r3, r4, r5, and r6, with argument #0 being
+         passed in r2 and so forth. */
+      struct {
+         s390_cc_t cond;
+         Addr64    target;
+         UInt      num_args;
+         HChar    *name;      /* callee's name (for debugging) */
+      } helper_call;
+      struct {
+         s390_bfp_triop_t tag;
+         s390_round_t     rounding_mode;
+         HReg             dst; /* first operand */
+         HReg             op2; /* second operand */
+         HReg             op3; /* third operand */
+      } bfp_triop;
+      struct {
+         s390_bfp_binop_t tag;
+         s390_round_t     rounding_mode;
+         HReg             dst; /* left operand */
+         HReg             op2; /* right operand */
+      } bfp_binop;
+      struct {
+         s390_bfp_unop_t tag;
+         s390_round_t    rounding_mode;
+         HReg            dst;  /* result */
+         HReg            op;   /* operand */
+      } bfp_unop;
+      struct {
+         s390_bfp_binop_t tag;
+         s390_round_t     rounding_mode;
+         HReg             dst_hi; /* left operand; high part */
+         HReg             dst_lo; /* left operand; low part */
+         HReg             op2_hi; /* right operand; high part */
+         HReg             op2_lo; /* right operand; low part */
+      } bfp128_binop;
+      /* This variant is also used by the BFP128_CONVERT_TO and
+         BFP128_CONVERT_FROM insns. */
+      struct {
+         s390_bfp_unop_t  tag;
+         s390_round_t     rounding_mode;
+         HReg             dst_hi; /* result; high part */
+         HReg             dst_lo; /* result; low part */
+         HReg             op_hi;  /* operand; high part */
+         HReg             op_lo;  /* operand; low part */
+      } bfp128_unop;
+      struct {
+         HReg             dst;    /* condition code in s390 encoding */
+         HReg             op1_hi; /* left operand; high part */
+         HReg             op1_lo; /* left operand; low part */
+         HReg             op2_hi; /* right operand; high part */
+         HReg             op2_lo; /* right operand; low part */
+      } bfp128_compare;
+   } variant;
+} s390_insn;
+
+s390_insn *s390_insn_load(UChar size, HReg dst, s390_amode *src);
+s390_insn *s390_insn_store(UChar size, s390_amode *dst, HReg src);
+s390_insn *s390_insn_move(UChar size, HReg dst, HReg src);
+s390_insn *s390_insn_cond_move(UChar size, s390_cc_t cond, HReg dst,
+                               s390_opnd_RMI src);
+s390_insn *s390_insn_load_immediate(UChar size, HReg dst, ULong val);
+s390_insn *s390_insn_alu(UChar size, s390_alu_t, HReg dst,
+                         s390_opnd_RMI op2);
+s390_insn *s390_insn_mul(UChar size, HReg dst_hi, HReg dst_lo,
+                         s390_opnd_RMI op2, Bool signed_multiply);
+s390_insn *s390_insn_div(UChar size, HReg op1_hi, HReg op1_lo,
+                         s390_opnd_RMI op2, Bool signed_divide);
+s390_insn *s390_insn_divs(UChar size, HReg rem, HReg op1, s390_opnd_RMI op2);
+s390_insn *s390_insn_clz(UChar size, HReg num_bits, HReg clobber,
+                         s390_opnd_RMI op);
+s390_insn *s390_insn_cas(UChar size, HReg op1, s390_amode *op2, HReg op3,
+                         HReg old);
+s390_insn *s390_insn_unop(UChar size, s390_unop_t tag, HReg dst,
+                          s390_opnd_RMI opnd);
+s390_insn *s390_insn_cc2bool(HReg dst, s390_cc_t src);
+s390_insn *s390_insn_test(UChar size, s390_opnd_RMI src);
+s390_insn *s390_insn_compare(UChar size, HReg dst, s390_opnd_RMI opnd,
+                             Bool signed_comparison);
+s390_insn *s390_insn_branch(IRJumpKind jk, s390_cc_t cond, s390_opnd_RMI dst);
+s390_insn *s390_insn_helper_call(s390_cc_t cond, Addr64 target, UInt num_args,
+                                 HChar *name);
+s390_insn *s390_insn_bfp_triop(UChar size, s390_bfp_triop_t, HReg dst, HReg op2,
+                               HReg op3, s390_round_t);
+s390_insn *s390_insn_bfp_binop(UChar size, s390_bfp_binop_t, HReg dst, HReg op2,
+                               s390_round_t);
+s390_insn *s390_insn_bfp_unop(UChar size, s390_bfp_unop_t tag, HReg dst,
+                              HReg op, s390_round_t);
+s390_insn *s390_insn_bfp_compare(UChar size, HReg dst, HReg op1, HReg op2);
+s390_insn *s390_insn_bfp128_binop(UChar size, s390_bfp_binop_t, HReg dst_hi,
+                                  HReg dst_lo, HReg op2_hi, HReg op2_lo,
+                                  s390_round_t);
+s390_insn *s390_insn_bfp128_unop(UChar size, s390_bfp_unop_t, HReg dst_hi,
+                                 HReg dst_lo, HReg op_hi, HReg op_lo,
+                                 s390_round_t);
+s390_insn *s390_insn_bfp128_compare(UChar size, HReg dst, HReg op1_hi,
+                                    HReg op1_lo, HReg op2_hi, HReg op2_lo);
+s390_insn *s390_insn_bfp128_convert_to(UChar size, s390_bfp_unop_t,
+                                       HReg dst_hi, HReg dst_lo, HReg op);
+s390_insn *s390_insn_bfp128_convert_from(UChar size, s390_bfp_unop_t,
+                                         HReg dst, HReg op_hi, HReg op_lo,
+                                         s390_round_t);
+s390_insn *s390_insn_mfence(void);
+UInt       s390_insn_emit(UChar *buf, Int nbuf, const s390_insn *insn,
+                          void *dispatch);
+
+const HChar *s390_insn_as_string(const s390_insn *);
+
+/*--------------------------------------------------------*/
+/* --- Interface exposed to VEX                       --- */
+/*--------------------------------------------------------*/
+
+void ppS390AMode(s390_amode *);
+void ppS390Instr(s390_insn *, Bool mode64);
+void ppHRegS390(HReg);
+
+/* Some functions that insulate the register allocator from details
+   of the underlying instruction set. */
+void  getRegUsage_S390Instr( HRegUsage *, s390_insn *, Bool );
+void  mapRegs_S390Instr    ( HRegRemap *, s390_insn *, Bool );
+Bool  isMove_S390Instr     ( s390_insn *, HReg *, HReg * );
+Int   emit_S390Instr       ( UChar *, Int, s390_insn *, Bool,
+                             void *, void * );
+void  getAllocableRegs_S390( Int *, HReg **, Bool );
+void  genSpill_S390        ( HInstr **, HInstr **, HReg , Int , Bool );
+void  genReload_S390       ( HInstr **, HInstr **, HReg , Int , Bool );
+s390_insn *directReload_S390 ( s390_insn *, HReg, Short );
+HInstrArray *iselSB_S390   ( IRSB *, VexArch, VexArchInfo *, VexAbiInfo * );
+
+/* KLUDGE: See detailled comment in host_s390_defs.c. */
+extern const VexArchInfo *s390_archinfo_host;
+
+/* Convenience macros to test installed facilities */
+#define s390_host_has_ldisp \
+                      (s390_archinfo_host->hwcaps & (VEX_HWCAPS_S390X_LDISP))
+#define s390_host_has_eimm \
+                      (s390_archinfo_host->hwcaps & (VEX_HWCAPS_S390X_EIMM))
+#define s390_host_has_gie \
+                      (s390_archinfo_host->hwcaps & (VEX_HWCAPS_S390X_GIE))
+#define s390_host_has_dfp \
+                      (s390_archinfo_host->hwcaps & (VEX_HWCAPS_S390X_DFP))
+#define s390_host_has_fgx \
+                      (s390_archinfo_host->hwcaps & (VEX_HWCAPS_S390X_FGX))
+
+#endif /* ndef __VEX_HOST_S390_DEFS_H */
+
+/*---------------------------------------------------------------*/
+/*--- end                                    host_s390_defs.h ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/host_s390_disasm.c b/main/VEX/priv/host_s390_disasm.c
new file mode 100644
index 0000000..35f1010
--- /dev/null
+++ b/main/VEX/priv/host_s390_disasm.c
@@ -0,0 +1,463 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                host_s390_disasm.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#include <stdarg.h>
+#include "libvex_basictypes.h"
+#include "main_util.h"        // vassert
+#include "main_globals.h"     // vex_traceflags
+#include "host_s390_disasm.h"
+
+/* The format that is used to write out a mnemonic.
+   These should be declared as 'const HChar' but vex_printf needs
+   to be changed for that first */
+static HChar s390_mnm_fmt[] = "%-8s";
+
+
+/* Return the name of a general purpose register for dis-assembly purposes. */
+static const HChar *
+gpr_operand(UInt archreg)
+{
+   static const HChar names[16][5] = {
+      "%r0", "%r1", "%r2", "%r3",
+      "%r4", "%r5", "%r6", "%r7",
+      "%r8", "%r9", "%r10", "%r11",
+      "%r12", "%r13", "%r14", "%r15",
+   };
+
+   vassert(archreg < 16);
+
+   return names[archreg];
+}
+
+
+/* Return the name of a floating point register for dis-assembly purposes. */
+static const HChar *
+fpr_operand(UInt archreg)
+{
+   static const HChar names[16][5] = {
+      "%f0", "%f1", "%f2", "%f3",
+      "%f4", "%f5", "%f6", "%f7",
+      "%f8", "%f9", "%f10", "%f11",
+      "%f12", "%f13", "%f14", "%f15",
+   };
+
+   vassert(archreg < 16);
+
+   return names[archreg];
+}
+
+
+/* Return the name of an access register for dis-assembly purposes. */
+static const HChar *
+ar_operand(UInt archreg)
+{
+   static const HChar names[16][5] = {
+      "%a0", "%a1", "%a2", "%a3",
+      "%a4", "%a5", "%a6", "%a7",
+      "%a8", "%a9", "%a10", "%a11",
+      "%a12", "%a13", "%a14", "%a15",
+   };
+
+   vassert(archreg < 16);
+
+   return names[archreg];
+}
+
+
+/* Build and return the extended mnemonic for the compare and branch
+   opcodes as introduced by z10. See also the opcodes in file
+   opcodes/s390-opc.txt (from binutils) that have a '$' in their name. */
+static const HChar *
+cab_operand(const HChar *base, UInt mask)
+{
+   HChar *to;
+   const HChar *from;
+
+   static HChar buf[10];   /* Maximum is 6 + 2 */
+
+   static HChar *suffix[] = {
+      "", "h", "l", "ne", "e", "nl", "nh", ""
+   };
+
+   /* strcpy(buf, from); */
+   for (from = base, to = buf; *from; ++from, ++to) {
+      *to = *from;
+   }
+   /* strcat(buf, suffix); */
+   for (from = suffix[mask >> 1]; *from; ++from, ++to) {
+      *to = *from;
+   }
+   *to = '\0';
+
+   return buf;
+}
+
+/* Common function used to construct a mnemonic based on a condition code
+   mask. */
+static const HChar *
+construct_mnemonic(const HChar *prefix, const HChar *suffix, UInt mask)
+{
+   HChar *to;
+   const HChar *from;
+
+   static HChar buf[10];
+
+   static HChar mask_id[16][4] = {
+      "", /* 0 -> unused */
+      "o", "h", "nle", "l", "nhe", "lh", "ne",
+      "e", "nlh", "he", "nl", "le", "nh", "no",
+      ""  /* 15 -> unused */
+   };
+
+   /* Guard against buffer overflow */
+   vassert(vex_strlen(prefix) + vex_strlen(suffix) + sizeof mask_id[0] <= sizeof buf);
+
+   /* strcpy(buf, prefix); */
+   for (from = prefix, to = buf; *from; ++from, ++to) {
+      *to = *from;
+   }
+   /* strcat(buf, mask_id); */
+   for (from = mask_id[mask]; *from; ++from, ++to) {
+      *to = *from;
+   }
+   /* strcat(buf, suffix); */
+   for (from = suffix; *from; ++from, ++to) {
+      *to = *from;
+   }
+   *to = '\0';
+
+   return buf;
+}
+
+
+/* Return the special mnemonic for the BCR opcode */
+static const HChar *
+bcr_operand(UInt m1)
+{
+   if (m1 ==  0) return "nopr";
+   if (m1 == 15) return "br";
+
+   return construct_mnemonic("b", "r", m1);
+}
+
+
+/* Return the special mnemonic for the BC opcode */
+static const HChar *
+bc_operand(UInt m1)
+{
+   if (m1 ==  0) return "nop";
+   if (m1 == 15) return "b";
+
+   return construct_mnemonic("b", "", m1);
+}
+
+
+/* Return the special mnemonic for the BRC opcode */
+static const HChar *
+brc_operand(UInt m1)
+{
+   if (m1 == 0)  return "brc";
+   if (m1 == 15) return "j";
+
+   return construct_mnemonic("j", "", m1);
+}
+
+
+/* Return the special mnemonic for the BRCL opcode */
+static const HChar *
+brcl_operand(UInt m1)
+{
+   if (m1 == 0)  return "brcl";
+   if (m1 == 15) return "jg";
+
+   return construct_mnemonic("jg", "", m1);
+}
+
+
+/* Return the special mnemonic for a conditional load/store  opcode */
+static const HChar *
+cls_operand(Int kind, UInt mask)
+{
+   HChar *prefix;
+
+   switch (kind) {
+   case S390_XMNM_LOCR:   prefix = "locr";  break;
+   case S390_XMNM_LOCGR:  prefix = "locgr"; break;
+   case S390_XMNM_LOC:    prefix = "loc";   break;
+   case S390_XMNM_LOCG:   prefix = "locg";  break;
+   case S390_XMNM_STOC:   prefix = "stoc";  break;
+   case S390_XMNM_STOCG:  prefix = "stocg"; break;
+   default:
+      vpanic("cls_operand");
+   }
+
+   return construct_mnemonic(prefix, "", mask);
+}
+
+
+/* An operand with a base register, an index register, and a displacement.
+   If the displacement is signed, the rightmost 20 bit of D need to be
+   sign extended */
+static HChar *
+dxb_operand(HChar *p, UInt d, UInt x, UInt b, Bool displacement_is_signed)
+{
+   if (displacement_is_signed) {
+      Int displ = ((Int)d << 12) >> 12;  /* sign extend */
+
+      p += vex_sprintf(p, "%d", displ);
+   } else {
+      p += vex_sprintf(p, "%u", d);
+   }
+   if (x != 0) {
+      p += vex_sprintf(p, "(%s", gpr_operand(x));
+      if (b != 0) {
+         p += vex_sprintf(p, ",%s", gpr_operand(b));
+      }
+      p += vex_sprintf(p, ")");
+   } else {
+      if (b != 0) {
+         p += vex_sprintf(p, "(%s)", gpr_operand(b));
+      }
+   }
+
+   return p;
+}
+
+
+/* An operand with base register, unsigned length, and a 12-bit
+   unsigned displacement */
+static HChar *
+udlb_operand(HChar *p, UInt d, UInt length, UInt b)
+{
+   p += vex_sprintf(p, "%u", d);
+   p += vex_sprintf(p, "(%u", length + 1);  // actual length is +1
+   if (b != 0) {
+      p += vex_sprintf(p, ",%s", gpr_operand(b));
+   }
+   p += vex_sprintf(p, ")");
+
+   return p;
+}
+
+
+/* The first argument is the command that says how to write the disassembled
+   insn. It is understood that the mnemonic comes first and that arguments
+   are separated by a ','. The command holds the arguments. Each argument is
+   encoded using a 4-bit S390_ARG_xyz value. The first argument is placed
+   in the least significant bits of the command and so on. There are at most
+   5 arguments in an insn and a sentinel (S390_ARG_DONE) is needed to identify
+   the end of the argument list. 6 * 4 = 24 bits are required for the
+   command. */
+void
+s390_disasm(UInt command, ...)
+{
+   va_list  args;
+   unsigned argkind;
+   HChar buf[128];  /* holds the disassembled insn */
+   HChar *p;
+   HChar separator;
+   Int mask_suffix = -1;
+
+   va_start(args, command);
+
+   p = buf;
+   separator = 0;
+
+   while (42) {
+      argkind = command & 0xF;
+      command >>= 4;
+
+      if (argkind == S390_ARG_DONE) goto done;
+
+      if (argkind == S390_ARG_CABM) separator = 0;  /* optional */
+
+      /* Write out the separator */
+      if (separator) *p++ = separator;
+
+      /* argument */
+      switch (argkind) {
+      case S390_ARG_MNM:
+         p += vex_sprintf(p, s390_mnm_fmt, va_arg(args, HChar *));
+         separator = ' ';
+         continue;
+
+      case S390_ARG_XMNM: {
+         UInt mask, kind;
+         const HChar *mnm;
+
+         kind = va_arg(args, UInt);
+
+         separator = ' ';
+         switch (kind) {
+         case S390_XMNM_BC:
+         case S390_XMNM_BCR:
+            mask = va_arg(args, UInt);
+            mnm = kind == S390_XMNM_BCR ? bcr_operand(mask) : bc_operand(mask);
+            p  += vex_sprintf(p, s390_mnm_fmt, mnm);
+            /* mask == 0 is a NOP and has no argument */
+            if (mask == 0) goto done;
+            break;
+
+         case S390_XMNM_BRC:
+         case S390_XMNM_BRCL:
+            mask = va_arg(args, UInt);
+            mnm = kind == S390_XMNM_BRC ? brc_operand(mask) : brcl_operand(mask);
+            p  += vex_sprintf(p, s390_mnm_fmt, mnm);
+
+            /* mask == 0 has no special mnemonic */
+            if (mask == 0) {
+               p += vex_sprintf(p, " 0");
+               separator = ',';
+            }
+            break;
+
+         case S390_XMNM_CAB:
+            mnm  = va_arg(args, HChar *);
+            mask = va_arg(args, UInt);
+            p  += vex_sprintf(p, s390_mnm_fmt, cab_operand(mnm, mask));
+            break;
+
+         case S390_XMNM_LOCR:
+         case S390_XMNM_LOCGR:
+         case S390_XMNM_LOC:
+         case S390_XMNM_LOCG:
+         case S390_XMNM_STOC:
+         case S390_XMNM_STOCG:
+            mask = va_arg(args, UInt);
+            mnm = cls_operand(kind, mask);
+            p  += vex_sprintf(p, s390_mnm_fmt, mnm);
+            /* There are no special opcodes when mask == 0 or 15. In that case
+               the integer mask is appended as the final operand */
+            if (mask == 0 || mask == 15) mask_suffix = mask;
+            break;
+         }
+      }
+      continue;
+
+      case S390_ARG_GPR:
+         p += vex_sprintf(p, "%s", gpr_operand(va_arg(args, UInt)));
+         break;
+
+      case S390_ARG_FPR:
+         p += vex_sprintf(p, "%s", fpr_operand(va_arg(args, UInt)));
+         break;
+
+      case S390_ARG_AR:
+         p += vex_sprintf(p, "%s", ar_operand(va_arg(args, UInt)));
+         break;
+
+      case S390_ARG_UINT:
+         p += vex_sprintf(p, "%u", va_arg(args, UInt));
+         break;
+
+      case S390_ARG_INT:
+         p += vex_sprintf(p, "%d", (Int)(va_arg(args, UInt)));
+         break;
+
+      case S390_ARG_PCREL: {
+         Int offset = (Int)(va_arg(args, UInt));
+
+         /* Convert # halfwords to # bytes */
+         offset <<= 1;
+
+         if (offset < 0) {
+            p += vex_sprintf(p, ".%d", offset);
+         } else {
+            p += vex_sprintf(p, ".+%u", offset);
+         }
+         break;
+      }
+
+      case S390_ARG_SDXB: {
+         UInt dh, dl, x, b;
+
+         dh = va_arg(args, UInt);
+         dl = va_arg(args, UInt);
+         x  = va_arg(args, UInt);
+         b  = va_arg(args, UInt);
+
+         p = dxb_operand(p, (dh << 12) | dl, x, b, 1 /* signed_displacement */);
+         break;
+      }
+
+      case S390_ARG_UDXB: {
+         UInt d, x, b;
+
+         d = va_arg(args, UInt);
+         x = va_arg(args, UInt);
+         b = va_arg(args, UInt);
+
+         p = dxb_operand(p, d, x, b, 0 /* signed_displacement */);
+         break;
+      }
+
+      case S390_ARG_UDLB: {
+         UInt d, l, b;
+
+         d = va_arg(args, UInt);
+         l = va_arg(args, UInt);
+         b = va_arg(args, UInt);
+
+         p = udlb_operand(p, d, l, b);
+         break;
+      }
+
+      case S390_ARG_CABM: {
+         UInt mask;
+
+         mask = va_arg(args, UInt) & 0xE;
+         if (mask == 0 || mask == 14) {
+            p += vex_sprintf(p, ",%u", mask);
+         }
+         break;
+      }
+      }
+
+      separator = ',';
+   }
+
+ done:
+   va_end(args);
+
+   if (mask_suffix != -1)
+      p += vex_sprintf(p, ",%d", mask_suffix);
+   *p = '\0';
+
+   vassert(p < buf + sizeof buf);  /* detect buffer overwrite */
+
+   /* Finally, write out the disassembled insn */
+   vex_printf("%s\n", buf);
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                  host_s390_disasm.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/host_s390_disasm.h b/main/VEX/priv/host_s390_disasm.h
new file mode 100644
index 0000000..c01aa34
--- /dev/null
+++ b/main/VEX/priv/host_s390_disasm.h
@@ -0,0 +1,93 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                host_s390_disasm.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+#ifndef __VEX_HOST_S390_DISASM_H
+#define __VEX_HOST_S390_DISASM_H
+
+#include "libvex_basictypes.h"
+
+/* Macros to encode a command for s390_disasm. */
+#undef  P
+#define P(a) (S390_ARG_##a)
+#undef  ENC1
+#define ENC1(a) ((P(DONE) << 4) | P(a))
+#undef  ENC2
+#define ENC2(a,b) ((P(DONE) << 8) | (P(b) << 4) | P(a))
+#undef  ENC3
+#define ENC3(a,b,c) ((P(DONE) << 12) | (P(c) << 8) | (P(b) << 4) | P(a))
+#undef  ENC4
+#define ENC4(a,b,c,d) ((P(DONE) << 16) | (P(d) << 12) | (P(c) << 8) | \
+                       (P(b) << 4) | P(a))
+#undef  ENC5
+#define ENC5(a,b,c,d,e) ((P(DONE) << 20) | (P(e) << 16) | (P(d) << 12) | \
+                         (P(c) << 8) | (P(b) << 4) | P(a))
+#undef  ENC6
+#define ENC6(a,b,c,d,e,f) ((P(DONE) << 24) | (P(f) << 20) | (P(e) << 16) | \
+                           (P(d) << 12) | (P(c) << 8) | (P(b) << 4) | P(a))
+
+/* The different kinds of operands in an asm insn */
+enum {
+   S390_ARG_DONE = 0,
+   S390_ARG_GPR = 1,
+   S390_ARG_FPR = 2,
+   S390_ARG_AR = 3,
+   S390_ARG_INT = 4,
+   S390_ARG_UINT = 5,
+   S390_ARG_PCREL = 6,
+   S390_ARG_SDXB = 7,
+   S390_ARG_UDXB = 8,
+   S390_ARG_UDLB = 9,
+   S390_ARG_CABM = 10,
+   S390_ARG_MNM = 11,
+   S390_ARG_XMNM = 12
+};
+
+/* The different kinds of extended mnemonics */
+enum {
+   S390_XMNM_CAB = 0,
+   S390_XMNM_BCR = 1,
+   S390_XMNM_BC = 2,
+   S390_XMNM_BRC = 3,
+   S390_XMNM_BRCL = 4,
+   S390_XMNM_LOCR = 5,
+   S390_XMNM_LOCGR = 6,
+   S390_XMNM_LOC = 7,
+   S390_XMNM_LOCG = 8,
+   S390_XMNM_STOC = 9,
+   S390_XMNM_STOCG = 10
+};
+
+void s390_disasm(UInt command, ...);
+
+/*---------------------------------------------------------------*/
+/*--- end                                  host_s390_disasm.h ---*/
+/*---------------------------------------------------------------*/
+
+#endif /* __VEX_HOST_S390_DISASM_H */
diff --git a/main/VEX/priv/host_s390_isel.c b/main/VEX/priv/host_s390_isel.c
new file mode 100644
index 0000000..4cdf443
--- /dev/null
+++ b/main/VEX/priv/host_s390_isel.c
@@ -0,0 +1,2446 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*---------------------------------------------------------------*/
+/*--- begin                                  host_s390_isel.c ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright IBM Corp. 2010-2011
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Contributed by Florian Krohm */
+
+#include "libvex_basictypes.h"
+#include "libvex_ir.h"
+#include "libvex.h"
+#include "libvex_s390x_common.h"
+
+#include "ir_match.h"
+#include "main_util.h"
+#include "main_globals.h"
+#include "host_generic_regs.h"
+#include "host_s390_defs.h"
+
+/*---------------------------------------------------------*/
+/*--- ISelEnv                                           ---*/
+/*---------------------------------------------------------*/
+
+/* This carries around:
+
+   - A mapping from IRTemp to IRType, giving the type of any IRTemp we
+     might encounter.  This is computed before insn selection starts,
+     and does not change.
+
+   - A mapping from IRTemp to HReg.  This tells the insn selector
+     which virtual register(s) are associated with each IRTemp
+      temporary.  This is computed before insn selection starts, and
+      does not change.  We expect this mapping to map precisely the
+      same set of IRTemps as the type mapping does.
+
+         - vregmap   holds the primary register for the IRTemp.
+         - vregmapHI holds the secondary register for the IRTemp,
+              if any is needed.  That's only for Ity_I64 temps
+              in 32 bit mode or Ity_I128 temps in 64-bit mode.
+
+    - The code array, that is, the insns selected so far.
+
+    - A counter, for generating new virtual registers.
+
+    - The host subarchitecture we are selecting insns for.
+      This is set at the start and does not change.
+*/
+
+typedef struct {
+   IRTypeEnv   *type_env;
+
+   HReg        *vregmap;
+   HReg        *vregmapHI;
+   UInt         n_vregmap;
+
+   HInstrArray *code;
+
+   UInt         vreg_ctr;
+
+   UInt         hwcaps;
+
+} ISelEnv;
+
+
+/* Forward declarations */
+static HReg          s390_isel_int_expr(ISelEnv *, IRExpr *);
+static s390_amode   *s390_isel_amode(ISelEnv *, IRExpr *);
+static s390_cc_t     s390_isel_cc(ISelEnv *, IRExpr *);
+static s390_opnd_RMI s390_isel_int_expr_RMI(ISelEnv *, IRExpr *);
+static void          s390_isel_int128_expr(HReg *, HReg *, ISelEnv *, IRExpr *);
+static HReg          s390_isel_float_expr(ISelEnv *, IRExpr *);
+static void          s390_isel_float128_expr(HReg *, HReg *, ISelEnv *, IRExpr *);
+
+
+/* Add an instruction */
+static void
+addInstr(ISelEnv *env, s390_insn *insn)
+{
+   addHInstr(env->code, insn);
+
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("%s\n", s390_insn_as_string(insn));
+   }
+}
+
+
+static __inline__ IRExpr *
+mkU64(ULong value)
+{
+   return IRExpr_Const(IRConst_U64(value));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Registers                                         ---*/
+/*---------------------------------------------------------*/
+
+/* Return the virtual register to which a given IRTemp is mapped. */
+static HReg
+lookupIRTemp(ISelEnv *env, IRTemp tmp)
+{
+   vassert(tmp < env->n_vregmap);
+   vassert(env->vregmap[tmp] != INVALID_HREG);
+
+   return env->vregmap[tmp];
+}
+
+
+/* Return the two virtual registers to which the IRTemp is mapped. */
+static void
+lookupIRTemp128(HReg *hi, HReg *lo, ISelEnv *env, IRTemp tmp)
+{
+   vassert(tmp < env->n_vregmap);
+   vassert(env->vregmapHI[tmp] != INVALID_HREG);
+
+   *lo = env->vregmap[tmp];
+   *hi = env->vregmapHI[tmp];
+}
+
+
+/* Allocate a new integer register */
+static HReg
+newVRegI(ISelEnv *env)
+{
+   HReg reg = mkHReg(env->vreg_ctr, HRcInt64, True /* virtual */ );
+   env->vreg_ctr++;
+
+   return reg;
+}
+
+
+/* Allocate a new floating point register */
+static HReg
+newVRegF(ISelEnv *env)
+{
+   HReg reg = mkHReg(env->vreg_ctr, HRcFlt64, True /* virtual */ );
+
+   env->vreg_ctr++;
+
+   return reg;
+}
+
+
+/* Construct a non-virtual general purpose register */
+static __inline__ HReg
+make_gpr(ISelEnv *env, UInt regno)
+{
+   return mkHReg(regno, HRcInt64, False /* virtual */ );
+}
+
+
+/* Construct a non-virtual floating point register */
+static __inline__ HReg
+make_fpr(UInt regno)
+{
+   return mkHReg(regno, HRcFlt64, False /* virtual */ );
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Amode                                             ---*/
+/*---------------------------------------------------------*/
+
+static __inline__ Bool
+ulong_fits_unsigned_12bit(ULong val)
+{
+   return (val & 0xFFFu) == val;
+}
+
+
+static __inline__ Bool
+ulong_fits_signed_20bit(ULong val)
+{
+   Long v = val & 0xFFFFFu;
+
+   v = (v << 44) >> 44;  /* sign extend */
+
+   return val == (ULong)v;
+}
+
+
+/* EXPR is an expression that is used as an address. Return an s390_amode
+   for it. */
+static s390_amode *
+s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr)
+{
+   if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
+      IRExpr *arg1 = expr->Iex.Binop.arg1;
+      IRExpr *arg2 = expr->Iex.Binop.arg2;
+
+      /* Move constant into right subtree */
+      if (arg1->tag == Iex_Const) {
+         IRExpr *tmp;
+         tmp  = arg1;
+         arg1 = arg2;
+         arg2 = tmp;
+      }
+
+      /* r + constant: Check for b12 first, then b20 */
+      if (arg2->tag == Iex_Const && arg2->Iex.Const.con->tag == Ico_U64) {
+         ULong value = arg2->Iex.Const.con->Ico.U64;
+
+         if (ulong_fits_unsigned_12bit(value)) {
+            return s390_amode_b12((Int)value, s390_isel_int_expr(env, arg1));
+         }
+         /* If long-displacement is not available, do not construct B20 or
+            BX20 amodes because code generation cannot handle them. */
+         if (s390_host_has_ldisp && ulong_fits_signed_20bit(value)) {
+            return s390_amode_b20((Int)value, s390_isel_int_expr(env, arg1));
+         }
+      }
+   }
+
+   /* Doesn't match anything in particular.  Generate it into
+      a register and use that. */
+   return s390_amode_b12(0, s390_isel_int_expr(env, expr));
+}
+
+
+static s390_amode *
+s390_isel_amode(ISelEnv *env, IRExpr *expr)
+{
+   s390_amode *am;
+
+   /* Address computation should yield a 64-bit value */
+   vassert(typeOfIRExpr(env->type_env, expr) == Ity_I64);
+
+   am = s390_isel_amode_wrk(env, expr);
+
+   /* Check post-condition */
+   vassert(s390_amode_is_sane(am));
+
+   return am;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Helper functions                                  ---*/
+/*---------------------------------------------------------*/
+
+/* Constants and memory accesses should be right operands */
+#define order_commutative_operands(left, right)                   \
+        do {                                                      \
+          if (left->tag == Iex_Const || left->tag == Iex_Load ||  \
+              left->tag == Iex_Get) {                             \
+            IRExpr *tmp;                                          \
+            tmp   = left;                                         \
+            left  = right;                                        \
+            right = tmp;                                          \
+          }                                                       \
+        } while (0)
+
+
+/* Copy an RMI operand to the DST register */
+static s390_insn *
+s390_opnd_copy(UChar size, HReg dst, s390_opnd_RMI opnd)
+{
+   switch (opnd.tag) {
+   case S390_OPND_AMODE:
+      return s390_insn_load(size, dst, opnd.variant.am);
+
+   case S390_OPND_REG:
+      return s390_insn_move(size, dst, opnd.variant.reg);
+
+   case S390_OPND_IMMEDIATE:
+      return s390_insn_load_immediate(size, dst, opnd.variant.imm);
+
+   default:
+      vpanic("s390_opnd_copy");
+   }
+}
+
+
+/* Construct a RMI operand for a register */
+static __inline__ s390_opnd_RMI
+s390_opnd_reg(HReg reg)
+{
+   s390_opnd_RMI opnd;
+
+   opnd.tag  = S390_OPND_REG;
+   opnd.variant.reg = reg;
+
+   return opnd;
+}
+
+
+/* Construct a RMI operand for an immediate constant */
+static __inline__ s390_opnd_RMI
+s390_opnd_imm(ULong value)
+{
+   s390_opnd_RMI opnd;
+
+   opnd.tag  = S390_OPND_IMMEDIATE;
+   opnd.variant.imm = value;
+
+   return opnd;
+}
+
+
+/* Return 1, if EXPR represents the cosntant 0 */
+static int
+s390_expr_is_const_zero(IRExpr *expr)
+{
+   ULong value;
+
+   if (expr->tag == Iex_Const) {
+      switch (expr->Iex.Const.con->tag) {
+      case Ico_U1:  value = expr->Iex.Const.con->Ico.U1;  break;
+      case Ico_U8:  value = expr->Iex.Const.con->Ico.U8;  break;
+      case Ico_U16: value = expr->Iex.Const.con->Ico.U16; break;
+      case Ico_U32: value = expr->Iex.Const.con->Ico.U32; break;
+      case Ico_U64: value = expr->Iex.Const.con->Ico.U64; break;
+      default:
+         vpanic("s390_expr_is_const_zero");
+      }
+      return value == 0;
+   }
+
+   return 0;
+}
+
+
+/* Call a helper (clean or dirty)
+   Arguments must satisfy the following conditions:
+   (a) they are expressions yielding an integer result
+   (b) there can be no more than S390_NUM_GPRPARMS arguments
+       guard is a Ity_Bit expression indicating whether or not the
+       call happens.  If guard==NULL, the call is unconditional.
+*/
+static void
+doHelperCall(ISelEnv *env, Bool passBBP, IRExpr *guard,
+             IRCallee *callee, IRExpr **args)
+{
+   UInt n_args, i, argreg, size;
+   ULong target;
+   HReg tmpregs[S390_NUM_GPRPARMS];
+   s390_cc_t cc;
+
+   n_args = 0;
+   for (i = 0; args[i]; i++)
+      ++n_args;
+
+   if (n_args > (S390_NUM_GPRPARMS - (passBBP ? 1 : 0))) {
+      vpanic("doHelperCall: too many arguments");
+   }
+
+   /* This is the "slow scheme". fixs390: implement the fast one */
+   argreg = 0;
+
+   /* If we need the guest state pointer put it in a temporary arg reg */
+   if (passBBP) {
+      tmpregs[argreg] = newVRegI(env);
+      addInstr(env, s390_insn_move(sizeof(ULong), tmpregs[argreg],
+                                   s390_hreg_guest_state_pointer()));
+      argreg++;
+   }
+
+   /* Compute the function arguments into a temporary register each */
+   for (i = 0; i < n_args; i++) {
+      tmpregs[argreg] = s390_isel_int_expr(env, args[i]);
+      argreg++;
+   }
+
+   /* Compute the condition */
+   cc = S390_CC_ALWAYS;
+   if (guard) {
+      if (guard->tag == Iex_Const
+          && guard->Iex.Const.con->tag == Ico_U1
+          && guard->Iex.Const.con->Ico.U1 == True) {
+         /* unconditional -- do nothing */
+      } else {
+         cc = s390_isel_cc(env, guard);
+      }
+   }
+
+   /* Move the args to the final register */
+   for (i = 0; i < argreg; i++) {
+      HReg finalreg;
+
+      finalreg = mkHReg(s390_gprno_from_arg_index(i), HRcInt64, False);
+      size = sizeofIRType(Ity_I64);
+      addInstr(env, s390_insn_move(size, finalreg, tmpregs[i]));
+   }
+
+   target = Ptr_to_ULong(callee->addr);
+
+   /* Finally, the call itself. */
+   addInstr(env, s390_insn_helper_call(cc, (Addr64)target, n_args,
+                                       callee->name));
+}
+
+
+/* Given an expression representing a rounding mode using IRRoundingMode
+   encoding convert it to an s390_round_t value.  */
+static s390_round_t
+decode_rounding_mode(IRExpr *rounding_expr)
+{
+   if (rounding_expr->tag == Iex_Const &&
+       rounding_expr->Iex.Const.con->tag == Ico_U32) {
+      IRRoundingMode mode = rounding_expr->Iex.Const.con->Ico.U32;
+
+      switch (mode) {
+      case Irrm_NEAREST:       return S390_ROUND_NEAREST_EVEN;
+      case Irrm_ZERO:          return S390_ROUND_ZERO;
+      case Irrm_PosINF:        return S390_ROUND_POSINF;
+      case Irrm_NegINF:        return S390_ROUND_NEGINF;
+      }
+   }
+
+   vpanic("decode_rounding_mode");
+}
+
+
+/* CC_S390 holds the condition code in s390 encoding. Convert it to
+   VEX encoding
+
+   s390     VEX              b6 b2 b0   cc.1  cc.0
+   0      0x40 EQ             1  0  0     0     0
+   1      0x01 LT             0  0  1     0     1
+   2      0x00 GT             0  0  0     1     0
+   3      0x45 Unordered      1  1  1     1     1
+
+   b0 = cc.0
+   b2 = cc.0 & cc.1
+   b6 = ~(cc.0 ^ cc.1)   // ((cc.0 - cc.1) + 0x1 ) & 0x1
+
+   VEX = b0 | (b2 << 2) | (b6 << 6);
+*/
+static HReg
+convert_s390_fpcc_to_vex(ISelEnv *env, HReg cc_s390)
+{
+   HReg cc0, cc1, b2, b6, cc_vex;
+
+   cc0 = newVRegI(env);
+   addInstr(env, s390_insn_move(4, cc0, cc_s390));
+   addInstr(env, s390_insn_alu(4, S390_ALU_AND, cc0, s390_opnd_imm(1)));
+
+   cc1 = newVRegI(env);
+   addInstr(env, s390_insn_move(4, cc1, cc_s390));
+   addInstr(env, s390_insn_alu(4, S390_ALU_RSH, cc1, s390_opnd_imm(1)));
+
+   b2 = newVRegI(env);
+   addInstr(env, s390_insn_move(4, b2, cc0));
+   addInstr(env, s390_insn_alu(4, S390_ALU_AND, b2, s390_opnd_reg(cc1)));
+   addInstr(env, s390_insn_alu(4, S390_ALU_LSH, b2, s390_opnd_imm(2)));
+
+   b6 = newVRegI(env);
+   addInstr(env, s390_insn_move(4, b6, cc0));
+   addInstr(env, s390_insn_alu(4, S390_ALU_SUB, b6, s390_opnd_reg(cc1)));
+   addInstr(env, s390_insn_alu(4, S390_ALU_ADD, b6, s390_opnd_imm(1)));
+   addInstr(env, s390_insn_alu(4, S390_ALU_AND, b6, s390_opnd_imm(1)));
+   addInstr(env, s390_insn_alu(4, S390_ALU_LSH, b6, s390_opnd_imm(6)));
+
+   cc_vex = newVRegI(env);
+   addInstr(env, s390_insn_move(4, cc_vex, cc0));
+   addInstr(env, s390_insn_alu(4, S390_ALU_OR, cc_vex, s390_opnd_reg(b2)));
+   addInstr(env, s390_insn_alu(4, S390_ALU_OR, cc_vex, s390_opnd_reg(b6)));
+
+   return cc_vex;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (128 bit)               ---*/
+/*---------------------------------------------------------*/
+static void
+s390_isel_int128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env,
+                          IRExpr *expr)
+{
+   IRType ty = typeOfIRExpr(env->type_env, expr);
+
+   vassert(ty == Ity_I128);
+
+   /* No need to consider the following
+      - 128-bit constants (they do not exist in VEX)
+      - 128-bit loads from memory (will not be generated)
+   */
+
+   /* Read 128-bit IRTemp */
+   if (expr->tag == Iex_RdTmp) {
+      lookupIRTemp128(dst_hi, dst_lo, env, expr->Iex.RdTmp.tmp);
+      return;
+   }
+
+   if (expr->tag == Iex_Binop) {
+      IRExpr *arg1 = expr->Iex.Binop.arg1;
+      IRExpr *arg2 = expr->Iex.Binop.arg2;
+      Bool is_signed_multiply, is_signed_divide;
+
+      switch (expr->Iex.Binop.op) {
+      case Iop_MullU64:
+         is_signed_multiply = False;
+         goto do_multiply64;
+
+      case Iop_MullS64:
+         is_signed_multiply = True;
+         goto do_multiply64;
+
+      case Iop_DivModU128to64:
+         is_signed_divide = False;
+         goto do_divide64;
+
+      case Iop_DivModS128to64:
+         is_signed_divide = True;
+         goto do_divide64;
+
+      case Iop_64HLto128:
+         *dst_hi = s390_isel_int_expr(env, arg1);
+         *dst_lo = s390_isel_int_expr(env, arg2);
+         return;
+
+      case Iop_DivModS64to64: {
+         HReg r10, r11, h1;
+         s390_opnd_RMI op2;
+
+         h1  = s390_isel_int_expr(env, arg1);       /* Process 1st operand */
+         op2 = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+
+         /* We use non-virtual registers r10 and r11 as pair */
+         r10  = make_gpr(env, 10);
+         r11  = make_gpr(env, 11);
+
+         /* Move 1st operand into r11 and */
+         addInstr(env, s390_insn_move(8, r11, h1));
+
+         /* Divide */
+         addInstr(env, s390_insn_divs(8, r10, r11, op2));
+
+         /* The result is in registers r10 (remainder) and r11 (quotient).
+            Move the result into the reg pair that is being returned such
+            such that the low 64 bits are the quotient and the upper 64 bits
+            are the remainder. (see libvex_ir.h). */
+         *dst_hi = newVRegI(env);
+         *dst_lo = newVRegI(env);
+         addInstr(env, s390_insn_move(8, *dst_hi, r10));
+         addInstr(env, s390_insn_move(8, *dst_lo, r11));
+         return;
+      }
+
+      default:
+         break;
+
+      do_multiply64: {
+            HReg r10, r11, h1;
+            s390_opnd_RMI op2;
+
+            order_commutative_operands(arg1, arg2);
+
+            h1   = s390_isel_int_expr(env, arg1);       /* Process 1st operand */
+            op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+
+            /* We use non-virtual registers r10 and r11 as pair */
+            r10  = make_gpr(env, 10);
+            r11  = make_gpr(env, 11);
+
+            /* Move the first operand to r11 */
+            addInstr(env, s390_insn_move(8, r11, h1));
+
+            /* Multiply */
+            addInstr(env, s390_insn_mul(8, r10, r11, op2, is_signed_multiply));
+
+            /* The result is in registers r10 and r11. Assign to two virtual regs
+               and return. */
+            *dst_hi = newVRegI(env);
+            *dst_lo = newVRegI(env);
+            addInstr(env, s390_insn_move(8, *dst_hi, r10));
+            addInstr(env, s390_insn_move(8, *dst_lo, r11));
+            return;
+         }
+
+      do_divide64: {
+         HReg r10, r11, hi, lo;
+         s390_opnd_RMI op2;
+
+         s390_isel_int128_expr(&hi, &lo, env, arg1);
+         op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+
+         /* We use non-virtual registers r10 and r11 as pair */
+         r10  = make_gpr(env, 10);
+         r11  = make_gpr(env, 11);
+
+         /* Move high 64 bits of the 1st operand into r10 and
+            the low 64 bits into r11. */
+         addInstr(env, s390_insn_move(8, r10, hi));
+         addInstr(env, s390_insn_move(8, r11, lo));
+
+         /* Divide */
+         addInstr(env, s390_insn_div(8, r10, r11, op2, is_signed_divide));
+
+         /* The result is in registers r10 (remainder) and r11 (quotient).
+            Move the result into the reg pair that is being returned such
+            such that the low 64 bits are the quotient and the upper 64 bits
+            are the remainder. (see libvex_ir.h). */
+         *dst_hi = newVRegI(env);
+         *dst_lo = newVRegI(env);
+         addInstr(env, s390_insn_move(8, *dst_hi, r10));
+         addInstr(env, s390_insn_move(8, *dst_lo, r11));
+         return;
+      }
+      }
+   }
+
+   vpanic("s390_isel_int128_expr");
+}
+
+
+/* Compute a 128-bit value into two 64-bit registers. These may be either
+   real or virtual regs; in any case they must not be changed by subsequent
+   code emitted by the caller. */
+static void
+s390_isel_int128_expr(HReg *dst_hi, HReg *dst_lo, ISelEnv *env, IRExpr *expr)
+{
+   s390_isel_int128_expr_wrk(dst_hi, dst_lo, env, expr);
+
+   /* Sanity checks ... */
+   vassert(hregIsVirtual(*dst_hi));
+   vassert(hregIsVirtual(*dst_lo));
+   vassert(hregClass(*dst_hi) == HRcInt64);
+   vassert(hregClass(*dst_lo) == HRcInt64);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Integer expressions (64/32/16/8 bit)        ---*/
+/*---------------------------------------------------------*/
+
+/* Select insns for an integer-typed expression, and add them to the
+   code list.  Return a reg holding the result.  This reg will be a
+   virtual register.  THE RETURNED REG MUST NOT BE MODIFIED.  If you
+   want to modify it, ask for a new vreg, copy it in there, and modify
+   the copy.  The register allocator will do its best to map both
+   vregs to the same real register, so the copies will often disappear
+   later in the game.
+
+   This should handle expressions of 64, 32, 16 and 8-bit type.
+   All results are returned in a 64bit register.
+   For 16- and 8-bit expressions, the upper (32/48/56 : 16/24) bits
+   are arbitrary, so you should mask or sign extend partial values
+   if necessary.
+*/
+
+/* DO NOT CALL THIS DIRECTLY ! */
+static HReg
+s390_isel_int_expr_wrk(ISelEnv *env, IRExpr *expr)
+{
+   IRType ty = typeOfIRExpr(env->type_env, expr);
+   UChar size;
+   s390_bfp_unop_t bfpop;
+
+   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 || ty == Ity_I64);
+
+   size = sizeofIRType(ty);   /* size of the result after evaluating EXPR */
+
+   switch (expr->tag) {
+
+      /* --------- TEMP --------- */
+   case Iex_RdTmp:
+      /* Return the virtual register that holds the temporary. */
+      return lookupIRTemp(env, expr->Iex.RdTmp.tmp);
+
+      /* --------- LOAD --------- */
+   case Iex_Load: {
+      HReg        dst = newVRegI(env);
+      s390_amode *am  = s390_isel_amode(env, expr->Iex.Load.addr);
+
+      if (expr->Iex.Load.end != Iend_BE)
+         goto irreducible;
+
+      addInstr(env, s390_insn_load(size, dst, am));
+
+      return dst;
+   }
+
+      /* --------- BINARY OP --------- */
+   case Iex_Binop: {
+      IRExpr *arg1 = expr->Iex.Binop.arg1;
+      IRExpr *arg2 = expr->Iex.Binop.arg2;
+      HReg h1, res;
+      s390_alu_t opkind;
+      s390_opnd_RMI op2, value, opnd;
+      s390_insn *insn;
+      Bool is_commutative, is_signed_multiply, is_signed_divide;
+
+      is_commutative = True;
+
+      switch (expr->Iex.Binop.op) {
+      case Iop_MullU8:
+      case Iop_MullU16:
+      case Iop_MullU32:
+         is_signed_multiply = False;
+         goto do_multiply;
+
+      case Iop_MullS8:
+      case Iop_MullS16:
+      case Iop_MullS32:
+         is_signed_multiply = True;
+         goto do_multiply;
+
+      do_multiply: {
+            HReg r10, r11;
+            UInt arg_size = size / 2;
+
+            order_commutative_operands(arg1, arg2);
+
+            h1   = s390_isel_int_expr(env, arg1);       /* Process 1st operand */
+            op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+
+            /* We use non-virtual registers r10 and r11 as pair */
+            r10  = make_gpr(env, 10);
+            r11  = make_gpr(env, 11);
+
+            /* Move the first operand to r11 */
+            addInstr(env, s390_insn_move(arg_size, r11, h1));
+
+            /* Multiply */
+            addInstr(env, s390_insn_mul(arg_size, r10, r11, op2, is_signed_multiply));
+
+            /* The result is in registers r10 and r11. Combine them into a SIZE-bit
+               value into the destination register. */
+            res  = newVRegI(env);
+            addInstr(env, s390_insn_move(arg_size, res, r10));
+            value = s390_opnd_imm(arg_size * 8);
+            addInstr(env, s390_insn_alu(size, S390_ALU_LSH, res, value));
+            value = s390_opnd_imm((((ULong)1) << arg_size * 8) - 1);
+            addInstr(env, s390_insn_alu(size, S390_ALU_AND, r11, value));
+            opnd = s390_opnd_reg(r11);
+            addInstr(env, s390_insn_alu(size, S390_ALU_OR,  res, opnd));
+            return res;
+         }
+
+      case Iop_DivModS64to32:
+         is_signed_divide = True;
+         goto do_divide;
+
+      case Iop_DivModU64to32:
+         is_signed_divide = False;
+         goto do_divide;
+
+      do_divide: {
+            HReg r10, r11;
+
+            h1   = s390_isel_int_expr(env, arg1);       /* Process 1st operand */
+            op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+
+            /* We use non-virtual registers r10 and r11 as pair */
+            r10  = make_gpr(env, 10);
+            r11  = make_gpr(env, 11);
+
+            /* Split the first operand and put the high 32 bits into r10 and
+               the low 32 bits into r11. */
+            addInstr(env, s390_insn_move(8, r10, h1));
+            addInstr(env, s390_insn_move(8, r11, h1));
+            value = s390_opnd_imm(32);
+            addInstr(env, s390_insn_alu(8, S390_ALU_RSH, r10, value));
+
+            /* Divide */
+            addInstr(env, s390_insn_div(4, r10, r11, op2, is_signed_divide));
+
+            /* The result is in registers r10 (remainder) and r11 (quotient).
+               Combine them into a 64-bit value such that the low 32 bits are
+               the quotient and the upper 32 bits are the remainder. (see
+               libvex_ir.h). */
+            res  = newVRegI(env);
+            addInstr(env, s390_insn_move(8, res, r10));
+            value = s390_opnd_imm(32);
+            addInstr(env, s390_insn_alu(8, S390_ALU_LSH, res, value));
+            value = s390_opnd_imm((((ULong)1) << 32) - 1);
+            addInstr(env, s390_insn_alu(8, S390_ALU_AND, r11, value));
+            opnd = s390_opnd_reg(r11);
+            addInstr(env, s390_insn_alu(8, S390_ALU_OR,  res, opnd));
+            return res;
+         }
+
+      case Iop_F32toI32S:  bfpop = S390_BFP_F32_TO_I32;  goto do_convert;
+      case Iop_F32toI64S:  bfpop = S390_BFP_F32_TO_I64;  goto do_convert;
+      case Iop_F64toI32S:  bfpop = S390_BFP_F64_TO_I32;  goto do_convert;
+      case Iop_F64toI64S:  bfpop = S390_BFP_F64_TO_I64;  goto do_convert;
+      case Iop_F128toI32S: bfpop = S390_BFP_F128_TO_I32; goto do_convert_128;
+      case Iop_F128toI64S: bfpop = S390_BFP_F128_TO_I64; goto do_convert_128;
+
+      do_convert: {
+         s390_round_t rounding_mode;
+
+         res  = newVRegI(env);
+         h1   = s390_isel_float_expr(env, arg2);   /* Process operand */
+
+         rounding_mode = decode_rounding_mode(arg1);
+         addInstr(env, s390_insn_bfp_unop(size, bfpop, res, h1, rounding_mode));
+         return res;
+      }
+
+      do_convert_128: {
+         s390_round_t rounding_mode;
+         HReg op_hi, op_lo, f13, f15;
+
+         res = newVRegI(env);
+         s390_isel_float128_expr(&op_hi, &op_lo, env, arg2); /* operand */
+
+         /* We use non-virtual registers r13 and r15 as pair */
+         f13 = make_fpr(13);
+         f15 = make_fpr(15);
+
+         /* operand --> (f13, f15) */
+         addInstr(env, s390_insn_move(8, f13, op_hi));
+         addInstr(env, s390_insn_move(8, f15, op_lo));
+
+         rounding_mode = decode_rounding_mode(arg1);
+         addInstr(env, s390_insn_bfp128_convert_from(size, bfpop, res, f13, f15,
+                                                     rounding_mode));
+         return res;
+      }
+
+      case Iop_8HLto16:
+      case Iop_16HLto32:
+      case Iop_32HLto64: {
+         HReg h2;
+         UInt arg_size = size / 2;
+
+         res  = newVRegI(env);
+         h1   = s390_isel_int_expr(env, arg1);   /* Process 1st operand */
+         h2   = s390_isel_int_expr(env, arg2);   /* Process 2nd operand */
+
+         addInstr(env, s390_insn_move(arg_size, res, h1));
+         value = s390_opnd_imm(arg_size * 8);
+         addInstr(env, s390_insn_alu(size, S390_ALU_LSH, res, value));
+         value = s390_opnd_imm((((ULong)1) << arg_size * 8) - 1);
+         addInstr(env, s390_insn_alu(size, S390_ALU_AND, h2, value));
+         opnd = s390_opnd_reg(h2);
+         addInstr(env, s390_insn_alu(size, S390_ALU_OR,  res, opnd));
+         return res;
+      }
+
+      case Iop_Max32U: {
+         /* arg1 > arg2 ? arg1 : arg2   using uint32_t arguments */
+         res = newVRegI(env);
+         h1  = s390_isel_int_expr(env, arg1);
+         op2 = s390_isel_int_expr_RMI(env, arg2);
+
+         addInstr(env, s390_insn_move(size, res, h1));
+         addInstr(env, s390_insn_compare(size, res, op2, False /* signed */));
+         addInstr(env, s390_insn_cond_move(size, S390_CC_L, res, op2));
+         return res;
+      }
+
+      case Iop_CmpF32:
+      case Iop_CmpF64: {
+         HReg cc_s390, h2;
+
+         h1 = s390_isel_float_expr(env, arg1);
+         h2 = s390_isel_float_expr(env, arg2);
+         cc_s390 = newVRegI(env);
+
+         size = (expr->Iex.Binop.op == Iop_CmpF32) ? 4 : 8;
+
+         addInstr(env, s390_insn_bfp_compare(size, cc_s390, h1, h2));
+
+         return convert_s390_fpcc_to_vex(env, cc_s390);
+      }
+
+      case Iop_CmpF128: {
+         HReg op1_hi, op1_lo, op2_hi, op2_lo, f12, f13, f14, f15, cc_s390;
+
+         s390_isel_float128_expr(&op1_hi, &op1_lo, env, arg1); /* 1st operand */
+         s390_isel_float128_expr(&op2_hi, &op2_lo, env, arg2); /* 2nd operand */
+         cc_s390 = newVRegI(env);
+
+         /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */
+         f12 = make_fpr(12);
+         f13 = make_fpr(13);
+         f14 = make_fpr(14);
+         f15 = make_fpr(15);
+
+         /* 1st operand --> (f12, f14) */
+         addInstr(env, s390_insn_move(8, f12, op1_hi));
+         addInstr(env, s390_insn_move(8, f14, op1_lo));
+
+         /* 2nd operand --> (f13, f15) */
+         addInstr(env, s390_insn_move(8, f13, op2_hi));
+         addInstr(env, s390_insn_move(8, f15, op2_lo));
+
+         res = newVRegI(env);
+         addInstr(env, s390_insn_bfp128_compare(16, cc_s390, f12, f14, f13, f15));
+
+         return convert_s390_fpcc_to_vex(env, cc_s390);
+      }
+
+      case Iop_Add8:
+      case Iop_Add16:
+      case Iop_Add32:
+      case Iop_Add64:
+         opkind = S390_ALU_ADD;
+         break;
+
+      case Iop_Sub8:
+      case Iop_Sub16:
+      case Iop_Sub32:
+      case Iop_Sub64:
+         opkind = S390_ALU_SUB;
+         is_commutative = False;
+         break;
+
+      case Iop_And8:
+      case Iop_And16:
+      case Iop_And32:
+      case Iop_And64:
+         opkind = S390_ALU_AND;
+         break;
+
+      case Iop_Or8:
+      case Iop_Or16:
+      case Iop_Or32:
+      case Iop_Or64:
+         opkind = S390_ALU_OR;
+         break;
+
+      case Iop_Xor8:
+      case Iop_Xor16:
+      case Iop_Xor32:
+      case Iop_Xor64:
+         opkind = S390_ALU_XOR;
+         break;
+
+      case Iop_Shl8:
+      case Iop_Shl16:
+      case Iop_Shl32:
+      case Iop_Shl64:
+         opkind = S390_ALU_LSH;
+         is_commutative = False;
+         break;
+
+      case Iop_Shr8:
+      case Iop_Shr16:
+      case Iop_Shr32:
+      case Iop_Shr64:
+         opkind = S390_ALU_RSH;
+         is_commutative = False;
+         break;
+
+      case Iop_Sar8:
+      case Iop_Sar16:
+      case Iop_Sar32:
+      case Iop_Sar64:
+         opkind = S390_ALU_RSHA;
+         is_commutative = False;
+         break;
+
+      default:
+         goto irreducible;
+      }
+
+      /* Pattern match: 0 - arg1  -->  -arg1   */
+      if (opkind == S390_ALU_SUB && s390_expr_is_const_zero(arg1)) {
+         res  = newVRegI(env);
+         op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+         insn = s390_insn_unop(size, S390_NEGATE, res, op2);
+         addInstr(env, insn);
+
+         return res;
+      }
+
+      if (is_commutative) {
+         order_commutative_operands(arg1, arg2);
+      }
+
+      h1   = s390_isel_int_expr(env, arg1);       /* Process 1st operand */
+      op2  = s390_isel_int_expr_RMI(env, arg2);   /* Process 2nd operand */
+      res  = newVRegI(env);
+      addInstr(env, s390_insn_move(size, res, h1));
+      insn = s390_insn_alu(size, opkind, res, op2);
+
+      addInstr(env, insn);
+
+      return res;
+   }
+
+      /* --------- UNARY OP --------- */
+   case Iex_Unop: {
+      static s390_opnd_RMI mask  = { S390_OPND_IMMEDIATE };
+      static s390_opnd_RMI shift = { S390_OPND_IMMEDIATE };
+      s390_opnd_RMI opnd;
+      s390_insn    *insn;
+      IRExpr *arg;
+      HReg    dst, h1;
+      IROp    unop, binop;
+
+      arg = expr->Iex.Unop.arg;
+
+      /* Special cases are handled here */
+
+      /* 32-bit multiply with 32-bit result or
+         64-bit multiply with 64-bit result */
+      unop  = expr->Iex.Unop.op;
+      binop = arg->Iex.Binop.op;
+
+      if ((arg->tag == Iex_Binop &&
+           ((unop == Iop_64to32 &&
+             (binop == Iop_MullS32 || binop == Iop_MullU32)) ||
+            (unop == Iop_128to64 &&
+             (binop == Iop_MullS64 || binop == Iop_MullU64))))) {
+         h1   = s390_isel_int_expr(env, arg->Iex.Binop.arg1);     /* 1st opnd */
+         opnd = s390_isel_int_expr_RMI(env, arg->Iex.Binop.arg2); /* 2nd opnd */
+         dst  = newVRegI(env);     /* Result goes into a new register */
+         addInstr(env, s390_insn_move(size, dst, h1));
+         addInstr(env, s390_insn_alu(size, S390_ALU_MUL, dst, opnd));
+
+         return dst;
+      }
+
+      if (unop == Iop_ReinterpF64asI64) {
+         dst = newVRegI(env);
+         h1  = s390_isel_float_expr(env, arg);     /* Process the operand */
+         addInstr(env, s390_insn_move(size, dst, h1));
+
+         return dst;
+      }
+
+      /* Expressions whose argument is 1-bit wide */
+      if (typeOfIRExpr(env->type_env, arg) == Ity_I1) {
+         s390_cc_t cond = s390_isel_cc(env, arg);
+         dst = newVRegI(env);     /* Result goes into a new register */
+         addInstr(env, s390_insn_cc2bool(dst, cond));
+
+         switch (unop) {
+         case Iop_1Uto8:
+         case Iop_1Uto32:
+         case Iop_1Uto64:
+            /* Nothing to do */
+            break;
+
+         case Iop_1Sto8:
+         case Iop_1Sto16:
+         case Iop_1Sto32:
+            shift.variant.imm = 31;
+            addInstr(env, s390_insn_alu(4, S390_ALU_LSH,  dst, shift));
+            addInstr(env, s390_insn_alu(4, S390_ALU_RSHA, dst, shift));
+            break;
+
+         case Iop_1Sto64:
+            shift.variant.imm = 63;
+            addInstr(env, s390_insn_alu(8, S390_ALU_LSH,  dst, shift));
+            addInstr(env, s390_insn_alu(8, S390_ALU_RSHA, dst, shift));
+            break;
+
+         default:
+            goto irreducible;
+         }
+
+         return dst;
+      }
+
+      /* Regular processing */
+
+      if (unop == Iop_128to64) {
+         HReg dst_hi, dst_lo;
+
+         s390_isel_int128_expr(&dst_hi, &dst_lo, env, arg);
+         return dst_lo;
+      }
+
+      if (unop == Iop_128HIto64) {
+         HReg dst_hi, dst_lo;
+
+         s390_isel_int128_expr(&dst_hi, &dst_lo, env, arg);
+         return dst_hi;
+      }
+
+      dst  = newVRegI(env);     /* Result goes into a new register */
+      opnd = s390_isel_int_expr_RMI(env, arg);     /* Process the operand */
+
+      switch (unop) {
+      case Iop_8Uto16:
+      case Iop_8Uto32:
+      case Iop_8Uto64:
+         insn = s390_insn_unop(size, S390_ZERO_EXTEND_8, dst, opnd);
+         break;
+
+      case Iop_16Uto32:
+      case Iop_16Uto64:
+         insn = s390_insn_unop(size, S390_ZERO_EXTEND_16, dst, opnd);
+         break;
+
+      case Iop_32Uto64:
+         insn = s390_insn_unop(size, S390_ZERO_EXTEND_32, dst, opnd);
+         break;
+
+      case Iop_8Sto16:
+      case Iop_8Sto32:
+      case Iop_8Sto64:
+         insn = s390_insn_unop(size, S390_SIGN_EXTEND_8, dst, opnd);
+         break;
+
+      case Iop_16Sto32:
+      case Iop_16Sto64:
+         insn = s390_insn_unop(size, S390_SIGN_EXTEND_16, dst, opnd);
+         break;
+
+      case Iop_32Sto64:
+         insn = s390_insn_unop(size, S390_SIGN_EXTEND_32, dst, opnd);
+         break;
+
+      case Iop_64to8:
+      case Iop_64to16:
+      case Iop_64to32:
+      case Iop_32to8:
+      case Iop_32to16:
+      case Iop_16to8:
+         /* Down-casts are no-ops. Upstream operations will only look at
+            the bytes that make up the result of the down-cast. So there
+            is no point setting the other bytes to 0. */
+         insn = s390_opnd_copy(8, dst, opnd);
+         break;
+
+      case Iop_64HIto32:
+         addInstr(env, s390_opnd_copy(8, dst, opnd));
+         shift.variant.imm = 32;
+         insn = s390_insn_alu(8, S390_ALU_RSH, dst, shift);
+         break;
+
+      case Iop_32HIto16:
+         addInstr(env, s390_opnd_copy(4, dst, opnd));
+         shift.variant.imm = 16;
+         insn = s390_insn_alu(4, S390_ALU_RSH, dst, shift);
+         break;
+
+      case Iop_16HIto8:
+         addInstr(env, s390_opnd_copy(2, dst, opnd));
+         shift.variant.imm = 8;
+         insn = s390_insn_alu(2, S390_ALU_RSH, dst, shift);
+         break;
+
+      case Iop_Not8:
+      case Iop_Not16:
+      case Iop_Not32:
+      case Iop_Not64:
+         /* XOR with ffff... */
+         mask.variant.imm = ~(ULong)0;
+         addInstr(env, s390_opnd_copy(size, dst, opnd));
+         insn = s390_insn_alu(size, S390_ALU_XOR, dst, mask);
+         break;
+
+      case Iop_Left8:
+      case Iop_Left16:
+      case Iop_Left32:
+      case Iop_Left64:
+         addInstr(env, s390_insn_unop(size, S390_NEGATE, dst, opnd));
+         insn = s390_insn_alu(size, S390_ALU_OR, dst, opnd);
+         break;
+
+      case Iop_CmpwNEZ32:
+      case Iop_CmpwNEZ64: {
+         /* Use the fact that x | -x == 0 iff x == 0. Otherwise, either X
+            or -X will have a 1 in the MSB. */
+         addInstr(env, s390_insn_unop(size, S390_NEGATE, dst, opnd));
+         addInstr(env, s390_insn_alu(size, S390_ALU_OR,  dst, opnd));
+         shift.variant.imm = (unop == Iop_CmpwNEZ32) ? 31 : 63;
+         addInstr(env, s390_insn_alu(size, S390_ALU_RSHA,  dst, shift));
+         return dst;
+      }
+
+      case Iop_Clz64: {
+         HReg r10, r11;
+
+         /* This will be implemented using FLOGR, if possible. So we need to
+            set aside a pair of non-virtual registers. The result (number of
+            left-most zero bits) will be in r10. The value in r11 is unspecified
+            and must not be used. */
+         r10  = make_gpr(env, 10);
+         r11  = make_gpr(env, 11);
+
+         addInstr(env, s390_insn_clz(8, r10, r11, opnd));
+         addInstr(env, s390_insn_move(8, dst, r10));
+         return dst;
+      }
+
+      default:
+         goto irreducible;
+      }
+
+      addInstr(env, insn);
+
+      return dst;
+   }
+
+      /* --------- GET --------- */
+   case Iex_Get: {
+      HReg dst = newVRegI(env);
+      s390_amode *am = s390_amode_for_guest_state(expr->Iex.Get.offset);
+
+      /* We never load more than 8 bytes from the guest state, because the
+         floating point register pair is not contiguous. */
+      vassert(size <= 8);
+
+      addInstr(env, s390_insn_load(size, dst, am));
+
+      return dst;
+   }
+
+   case Iex_GetI:
+      /* not needed */
+      break;
+
+      /* --------- CCALL --------- */
+   case Iex_CCall: {
+      HReg dst = newVRegI(env);
+
+      doHelperCall(env, False, NULL, expr->Iex.CCall.cee,
+                   expr->Iex.CCall.args);
+
+      /* Move the returned value into the return register */
+      addInstr(env, s390_insn_move(sizeofIRType(expr->Iex.CCall.retty), dst,
+                                   mkHReg(S390_REGNO_RETURN_VALUE,
+                                          HRcInt64, False)));
+      return dst;
+   }
+
+      /* --------- LITERAL --------- */
+
+      /* Load a literal into a register. Create a "load immediate"
+         v-insn and return the register. */
+   case Iex_Const: {
+      ULong value;
+      HReg  dst = newVRegI(env);
+      const IRConst *con = expr->Iex.Const.con;
+
+      /* Bitwise copy of the value. No sign/zero-extension */
+      switch (con->tag) {
+      case Ico_U64: value = con->Ico.U64; break;
+      case Ico_U32: value = con->Ico.U32; break;
+      case Ico_U16: value = con->Ico.U16; break;
+      case Ico_U8:  value = con->Ico.U8;  break;
+      default:      vpanic("s390_isel_int_expr: invalid constant");
+      }
+
+      addInstr(env, s390_insn_load_immediate(size, dst, value));
+
+      return dst;
+   }
+
+      /* --------- MULTIPLEX --------- */
+   case Iex_Mux0X: {
+      IRExpr *cond_expr;
+      HReg dst, tmp, rX;
+      s390_opnd_RMI cond, r0, zero;
+
+      cond_expr = expr->Iex.Mux0X.cond;
+
+      dst  = newVRegI(env);
+      r0   = s390_isel_int_expr_RMI(env, expr->Iex.Mux0X.expr0);
+      rX   = s390_isel_int_expr(env, expr->Iex.Mux0X.exprX);
+      size = sizeofIRType(typeOfIRExpr(env->type_env, expr->Iex.Mux0X.exprX));
+
+      if (cond_expr->tag == Iex_Unop && cond_expr->Iex.Unop.op == Iop_1Uto8) {
+         s390_cc_t cc = s390_isel_cc(env, cond_expr->Iex.Unop.arg);
+
+         addInstr(env, s390_insn_move(size, dst, rX));
+         addInstr(env, s390_insn_cond_move(size, s390_cc_invert(cc), dst, r0));
+         return dst;
+      }
+
+      /* Assume the condition is true and move rX to the destination reg. */
+      addInstr(env, s390_insn_move(size, dst, rX));
+
+      /* Compute the condition ... */
+      cond = s390_isel_int_expr_RMI(env, cond_expr);
+
+      /* tmp = cond & 0xFF */
+      tmp  = newVRegI(env);
+      addInstr(env, s390_insn_load_immediate(4, tmp, 0xFF));
+      addInstr(env, s390_insn_alu(4, S390_ALU_AND, tmp, cond));
+
+      /* ... and compare it with zero */
+      zero = s390_opnd_imm(0);
+      addInstr(env, s390_insn_compare(4, tmp, zero, 0 /* signed */));
+
+      /* ... and if it compared equal move r0 to the destination reg. */
+      size = sizeofIRType(typeOfIRExpr(env->type_env, expr->Iex.Mux0X.expr0));
+      addInstr(env, s390_insn_cond_move(size, S390_CC_E, dst, r0));
+
+      return dst;
+   }
+
+   default:
+      break;
+   }
+
+   /* We get here if no pattern matched. */
+ irreducible:
+   ppIRExpr(expr);
+   vpanic("s390_isel_int_expr: cannot reduce tree");
+}
+
+
+static HReg
+s390_isel_int_expr(ISelEnv *env, IRExpr *expr)
+{
+   HReg dst = s390_isel_int_expr_wrk(env, expr);
+
+   /* Sanity checks ... */
+   vassert(hregClass(dst) == HRcInt64);
+   vassert(hregIsVirtual(dst));
+
+   return dst;
+}
+
+
+static s390_opnd_RMI
+s390_isel_int_expr_RMI(ISelEnv *env, IRExpr *expr)
+{
+   IRType ty = typeOfIRExpr(env->type_env, expr);
+   s390_opnd_RMI dst;
+
+   vassert(ty == Ity_I8 || ty == Ity_I16 || ty == Ity_I32 ||
+           ty == Ity_I64);
+
+   if (expr->tag == Iex_Load) {
+      dst.tag = S390_OPND_AMODE;
+      dst.variant.am = s390_isel_amode(env, expr->Iex.Load.addr);
+   } else if (expr->tag == Iex_Get) {
+      dst.tag = S390_OPND_AMODE;
+      dst.variant.am = s390_amode_for_guest_state(expr->Iex.Get.offset);
+   } else if (expr->tag == Iex_Const) {
+      ULong value;
+
+      /* The bit pattern for the value will be stored as is in the least
+         significant bits of VALUE. */
+      switch (expr->Iex.Const.con->tag) {
+      case Ico_U1:  value = expr->Iex.Const.con->Ico.U1;  break;
+      case Ico_U8:  value = expr->Iex.Const.con->Ico.U8;  break;
+      case Ico_U16: value = expr->Iex.Const.con->Ico.U16; break;
+      case Ico_U32: value = expr->Iex.Const.con->Ico.U32; break;
+      case Ico_U64: value = expr->Iex.Const.con->Ico.U64; break;
+      default:
+         vpanic("s390_isel_int_expr_RMI");
+      }
+
+      dst.tag = S390_OPND_IMMEDIATE;
+      dst.variant.imm = value;
+   } else {
+      dst.tag = S390_OPND_REG;
+      dst.variant.reg = s390_isel_int_expr(env, expr);
+   }
+
+   return dst;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (128 bit)        ---*/
+/*---------------------------------------------------------*/
+static void
+s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env,
+                            IRExpr *expr)
+{
+   IRType ty = typeOfIRExpr(env->type_env, expr);
+
+   vassert(ty == Ity_F128);
+
+   /* Read 128-bit IRTemp */
+   if (expr->tag == Iex_RdTmp) {
+      lookupIRTemp128(dst_hi, dst_lo, env, expr->Iex.RdTmp.tmp);
+      return;
+   }
+
+   switch (expr->tag) {
+   case Iex_RdTmp:
+      /* Return the virtual registers that hold the temporary. */
+      lookupIRTemp128(dst_hi, dst_lo, env, expr->Iex.RdTmp.tmp);
+      return;
+
+      /* --------- LOAD --------- */
+   case Iex_Load: {
+      IRExpr *addr_hi, *addr_lo;
+      s390_amode *am_hi, *am_lo;
+
+      if (expr->Iex.Load.end != Iend_BE)
+         goto irreducible;
+
+      addr_hi = expr->Iex.Load.addr;
+      addr_lo = IRExpr_Binop(Iop_Add64, addr_hi, mkU64(8));
+
+      am_hi  = s390_isel_amode(env, addr_hi);
+      am_lo  = s390_isel_amode(env, addr_lo);
+
+      *dst_hi = newVRegF(env);
+      *dst_lo = newVRegF(env);
+      addInstr(env, s390_insn_load(8, *dst_hi, am_hi));
+      addInstr(env, s390_insn_load(8, *dst_hi, am_lo));
+      return;
+   }
+
+
+      /* --------- GET --------- */
+   case Iex_Get:
+      /* This is not supported because loading 128-bit from the guest
+         state is almost certainly wrong. Use get_fpr_pair instead. */
+      vpanic("Iex_Get with F128 data");
+
+      /* --------- 4-ary OP --------- */
+   case Iex_Qop:
+      vpanic("Iex_Qop with F128 data");
+
+      /* --------- TERNARY OP --------- */
+   case Iex_Triop: {
+      IROp    op    = expr->Iex.Triop.op;
+      IRExpr *left  = expr->Iex.Triop.arg2;
+      IRExpr *right = expr->Iex.Triop.arg3;
+      s390_bfp_binop_t bfpop;
+      s390_round_t rounding_mode;
+      HReg op1_hi, op1_lo, op2_hi, op2_lo, f12, f13, f14, f15;
+
+      s390_isel_float128_expr(&op1_hi, &op1_lo, env, left);  /* 1st operand */
+      s390_isel_float128_expr(&op2_hi, &op2_lo, env, right); /* 2nd operand */
+
+      /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */
+      f12 = make_fpr(12);
+      f13 = make_fpr(13);
+      f14 = make_fpr(14);
+      f15 = make_fpr(15);
+
+      /* 1st operand --> (f12, f14) */
+      addInstr(env, s390_insn_move(8, f12, op1_hi));
+      addInstr(env, s390_insn_move(8, f14, op1_lo));
+
+      /* 2nd operand --> (f13, f15) */
+      addInstr(env, s390_insn_move(8, f13, op2_hi));
+      addInstr(env, s390_insn_move(8, f15, op2_lo));
+
+      switch (op) {
+      case Iop_AddF128: bfpop = S390_BFP_ADD; break;
+      case Iop_SubF128: bfpop = S390_BFP_SUB; break;
+      case Iop_MulF128: bfpop = S390_BFP_MUL; break;
+      case Iop_DivF128: bfpop = S390_BFP_DIV; break;
+      default:
+         goto irreducible;
+      }
+
+      rounding_mode = decode_rounding_mode(expr->Iex.Triop.arg1);
+      addInstr(env, s390_insn_bfp128_binop(16, bfpop, f12, f14, f13,
+                                           f15, rounding_mode));
+
+      /* Move result to virtual destination register */
+      *dst_hi = newVRegF(env);
+      *dst_lo = newVRegF(env);
+      addInstr(env, s390_insn_move(8, *dst_hi, f12));
+      addInstr(env, s390_insn_move(8, *dst_lo, f14));
+
+      return;
+   }
+
+      /* --------- BINARY OP --------- */
+   case Iex_Binop: {
+      HReg op_hi, op_lo, f12, f13, f14, f15;
+      s390_bfp_unop_t bfpop;
+      s390_round_t rounding_mode;
+
+      /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */
+      f12 = make_fpr(12);
+      f13 = make_fpr(13);
+      f14 = make_fpr(14);
+      f15 = make_fpr(15);
+
+      switch (expr->Iex.Binop.op) {
+      case Iop_SqrtF128:
+         s390_isel_float128_expr(&op_hi, &op_lo, env, expr->Iex.Binop.arg2);
+
+         /* operand --> (f13, f15) */
+         addInstr(env, s390_insn_move(8, f13, op_hi));
+         addInstr(env, s390_insn_move(8, f15, op_lo));
+
+         bfpop = S390_BFP_SQRT;
+         rounding_mode = decode_rounding_mode(expr->Iex.Binop.arg1);
+
+         addInstr(env, s390_insn_bfp128_unop(16, bfpop, f12, f14, f13, f15,
+                                             rounding_mode));
+
+         /* Move result to virtual destination registers */
+         *dst_hi = newVRegF(env);
+         *dst_lo = newVRegF(env);
+         addInstr(env, s390_insn_move(8, *dst_hi, f12));
+         addInstr(env, s390_insn_move(8, *dst_lo, f14));
+         return;
+
+      case Iop_F64HLtoF128:
+         *dst_hi = s390_isel_float_expr(env, expr->Iex.Binop.arg1);
+         *dst_lo = s390_isel_float_expr(env, expr->Iex.Binop.arg2);
+         return;
+
+      default:
+         goto irreducible;
+      }
+   }
+
+      /* --------- UNARY OP --------- */
+   case Iex_Unop: {
+      IRExpr *left = expr->Iex.Binop.arg1;
+      s390_bfp_unop_t bfpop;
+      s390_round_t rounding_mode;
+      HReg op_hi, op_lo, op, f12, f13, f14, f15;
+
+      /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */
+      f12 = make_fpr(12);
+      f13 = make_fpr(13);
+      f14 = make_fpr(14);
+      f15 = make_fpr(15);
+
+      switch (expr->Iex.Binop.op) {
+      case Iop_NegF128:       bfpop = S390_BFP_NEG;          goto float128_opnd;
+      case Iop_AbsF128:       bfpop = S390_BFP_ABS;          goto float128_opnd;
+      case Iop_I32StoF128:    bfpop = S390_BFP_I32_TO_F128;  goto convert_int;
+      case Iop_I64StoF128:    bfpop = S390_BFP_I64_TO_F128;  goto convert_int;
+      case Iop_F32toF128:     bfpop = S390_BFP_F32_TO_F128;  goto convert_float;
+      case Iop_F64toF128:     bfpop = S390_BFP_F64_TO_F128;  goto convert_float;
+      default:
+         goto irreducible;
+      }
+
+   float128_opnd:
+      s390_isel_float128_expr(&op_hi, &op_lo, env, left);
+
+      /* operand --> (f13, f15) */
+      addInstr(env, s390_insn_move(8, f13, op_hi));
+      addInstr(env, s390_insn_move(8, f15, op_lo));
+
+      rounding_mode = S390_ROUND_NEAREST_EVEN;  /* will not be used later on */
+      addInstr(env, s390_insn_bfp128_unop(16, bfpop, f12, f14, f13, f15,
+                                          rounding_mode));
+      goto move_dst;
+
+   convert_float:
+      op  = s390_isel_float_expr(env, left);
+      addInstr(env, s390_insn_bfp128_convert_to(16, bfpop, f12, f14,
+                                                op));
+      goto move_dst;
+
+   convert_int:
+      op  = s390_isel_int_expr(env, left);
+      addInstr(env, s390_insn_bfp128_convert_to(16, bfpop, f12, f14,
+                                                op));
+      goto move_dst;
+
+   move_dst:
+      /* Move result to virtual destination registers */
+      *dst_hi = newVRegF(env);
+      *dst_lo = newVRegF(env);
+      addInstr(env, s390_insn_move(8, *dst_hi, f12));
+      addInstr(env, s390_insn_move(8, *dst_lo, f14));
+      return;
+   }
+
+   default:
+      goto irreducible;
+   }
+
+   /* We get here if no pattern matched. */
+ irreducible:
+   ppIRExpr(expr);
+   vpanic("s390_isel_int_expr: cannot reduce tree");
+}
+
+/* Compute a 128-bit value into two 64-bit registers. These may be either
+   real or virtual regs; in any case they must not be changed by subsequent
+   code emitted by the caller. */
+static void
+s390_isel_float128_expr(HReg *dst_hi, HReg *dst_lo, ISelEnv *env, IRExpr *expr)
+{
+   s390_isel_float128_expr_wrk(dst_hi, dst_lo, env, expr);
+
+   /* Sanity checks ... */
+   vassert(hregIsVirtual(*dst_hi));
+   vassert(hregIsVirtual(*dst_lo));
+   vassert(hregClass(*dst_hi) == HRcFlt64);
+   vassert(hregClass(*dst_lo) == HRcFlt64);
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Floating point expressions (64 bit)         ---*/
+/*---------------------------------------------------------*/
+
+static HReg
+s390_isel_float_expr_wrk(ISelEnv *env, IRExpr *expr)
+{
+   IRType ty = typeOfIRExpr(env->type_env, expr);
+   UChar size;
+
+   vassert(ty == Ity_F32 || ty == Ity_F64);
+
+   size = sizeofIRType(ty);
+
+   switch (expr->tag) {
+   case Iex_RdTmp:
+      /* Return the virtual register that holds the temporary. */
+      return lookupIRTemp(env, expr->Iex.RdTmp.tmp);
+
+      /* --------- LOAD --------- */
+   case Iex_Load: {
+      HReg        dst = newVRegF(env);
+      s390_amode *am  = s390_isel_amode(env, expr->Iex.Load.addr);
+
+      if (expr->Iex.Load.end != Iend_BE)
+         goto irreducible;
+
+      addInstr(env, s390_insn_load(size, dst, am));
+
+      return dst;
+   }
+
+      /* --------- GET --------- */
+   case Iex_Get: {
+      HReg dst = newVRegF(env);
+      s390_amode *am = s390_amode_for_guest_state(expr->Iex.Get.offset);
+
+      addInstr(env, s390_insn_load(size, dst, am));
+
+      return dst;
+   }
+
+      /* --------- LITERAL --------- */
+
+      /* Load a literal into a register. Create a "load immediate"
+         v-insn and return the register. */
+   case Iex_Const: {
+      ULong value;
+      HReg  dst = newVRegF(env);
+      const IRConst *con = expr->Iex.Const.con;
+
+      /* Bitwise copy of the value. No sign/zero-extension */
+      switch (con->tag) {
+      case Ico_F32i: value = con->Ico.F32i; break;
+      case Ico_F64i: value = con->Ico.F64i; break;
+      default:       vpanic("s390_isel_float_expr: invalid constant");
+      }
+
+      if (value != 0) vpanic("cannot load immediate floating point constant");
+
+      addInstr(env, s390_insn_load_immediate(size, dst, value));
+
+      return dst;
+   }
+
+      /* --------- 4-ary OP --------- */
+   case Iex_Qop: {
+      HReg op1, op2, op3, dst;
+      s390_bfp_triop_t bfpop;
+      s390_round_t rounding_mode;
+
+      op1 = s390_isel_float_expr(env, expr->Iex.Qop.arg2);
+      op2 = s390_isel_float_expr(env, expr->Iex.Qop.arg3);
+      op3 = s390_isel_float_expr(env, expr->Iex.Qop.arg4);
+      dst = newVRegF(env);
+      addInstr(env, s390_insn_move(size, dst, op1));
+
+      switch (expr->Iex.Qop.op) {
+      case Iop_MAddF32:
+      case Iop_MAddF64:  bfpop = S390_BFP_MADD; break;
+      case Iop_MSubF32:
+      case Iop_MSubF64:  bfpop = S390_BFP_MSUB; break;
+
+      default:
+         goto irreducible;
+      }
+
+      rounding_mode = decode_rounding_mode(expr->Iex.Qop.arg1);
+      addInstr(env, s390_insn_bfp_triop(size, bfpop, dst, op2, op3,
+                                        rounding_mode));
+      return dst;
+   }
+
+      /* --------- TERNARY OP --------- */
+   case Iex_Triop: {
+      IROp    op    = expr->Iex.Triop.op;
+      IRExpr *left  = expr->Iex.Triop.arg2;
+      IRExpr *right = expr->Iex.Triop.arg3;
+      s390_bfp_binop_t bfpop;
+      s390_round_t rounding_mode;
+      HReg h1, op2, dst;
+
+      h1   = s390_isel_float_expr(env, left);  /* Process 1st operand */
+      op2  = s390_isel_float_expr(env, right); /* Process 2nd operand */
+      dst  = newVRegF(env);
+      addInstr(env, s390_insn_move(size, dst, h1));
+      switch (op) {
+      case Iop_AddF32:
+      case Iop_AddF64:  bfpop = S390_BFP_ADD; break;
+      case Iop_SubF32:
+      case Iop_SubF64:  bfpop = S390_BFP_SUB; break;
+      case Iop_MulF32:
+      case Iop_MulF64:  bfpop = S390_BFP_MUL; break;
+      case Iop_DivF32:
+      case Iop_DivF64:  bfpop = S390_BFP_DIV; break;
+
+      default:
+         goto irreducible;
+      }
+
+      rounding_mode = decode_rounding_mode(expr->Iex.Triop.arg1);
+      addInstr(env, s390_insn_bfp_binop(size, bfpop, dst, op2, rounding_mode));
+      return dst;
+   }
+
+      /* --------- BINARY OP --------- */
+   case Iex_Binop: {
+      IROp    op   = expr->Iex.Binop.op;
+      IRExpr *left = expr->Iex.Binop.arg2;
+      HReg h1, dst;
+      s390_bfp_unop_t bfpop;
+      s390_round_t rounding_mode;
+      Int integer_operand;
+
+      integer_operand = 1;
+
+      switch (op) {
+      case Iop_SqrtF32:
+      case Iop_SqrtF64:
+         bfpop = S390_BFP_SQRT;
+         integer_operand = 0;
+         break;
+
+      case Iop_F64toF32:
+         bfpop = S390_BFP_F64_TO_F32;
+         integer_operand = 0;
+         break;
+
+      case Iop_I32StoF32: bfpop = S390_BFP_I32_TO_F32; break;
+      case Iop_I64StoF32: bfpop = S390_BFP_I64_TO_F32; break;
+      case Iop_I64StoF64: bfpop = S390_BFP_I64_TO_F64; break;
+      default:
+         goto irreducible;
+
+      case Iop_F128toF64:
+      case Iop_F128toF32: {
+         HReg op_hi, op_lo, f12, f13, f14, f15;
+
+         bfpop = op == Iop_F128toF32 ? S390_BFP_F128_TO_F32
+            : S390_BFP_F128_TO_F64;
+
+         rounding_mode = decode_rounding_mode(expr->Iex.Binop.arg1);
+
+         s390_isel_float128_expr(&op_hi, &op_lo, env, expr->Iex.Binop.arg2);
+
+         /* We use non-virtual registers as pairs (f13, f15) and (f12, f14)) */
+         f12 = make_fpr(12);
+         f13 = make_fpr(13);
+         f14 = make_fpr(14);
+         f15 = make_fpr(15);
+
+         /* operand --> (f13, f15) */
+         addInstr(env, s390_insn_move(8, f13, op_hi));
+         addInstr(env, s390_insn_move(8, f15, op_lo));
+
+         dst = newVRegF(env);
+         addInstr(env, s390_insn_bfp128_unop(16, bfpop, f12, f14, f13, f15,
+                                             rounding_mode));
+
+         /* Move result to virtual destination registers */
+         addInstr(env, s390_insn_move(8, dst, f12));
+         return dst;
+      }
+      }
+
+      /* Process operand */
+      if (integer_operand) {
+         h1  = s390_isel_int_expr(env, left);
+      } else {
+         h1  = s390_isel_float_expr(env, left);
+      }
+
+      dst = newVRegF(env);
+      rounding_mode = decode_rounding_mode(expr->Iex.Binop.arg1);
+      addInstr(env, s390_insn_bfp_unop(size, bfpop, dst, h1, rounding_mode));
+      return dst;
+   }
+
+      /* --------- UNARY OP --------- */
+   case Iex_Unop: {
+      IROp    op   = expr->Iex.Unop.op;
+      IRExpr *left = expr->Iex.Unop.arg;
+      s390_bfp_unop_t bfpop;
+      s390_round_t rounding_mode;
+      HReg h1, dst;
+
+      if (op == Iop_F128HItoF64 || op == Iop_F128LOtoF64) {
+         HReg dst_hi, dst_lo;
+
+         s390_isel_float128_expr(&dst_hi, &dst_lo, env, left);
+         return op == Iop_F128LOtoF64 ? dst_lo : dst_hi;
+      }
+
+      if (op == Iop_ReinterpI64asF64) {
+         dst = newVRegF(env);
+         h1  = s390_isel_int_expr(env, left);     /* Process the operand */
+         addInstr(env, s390_insn_move(size, dst, h1));
+
+         return dst;
+      }
+
+      switch (op) {
+      case Iop_NegF32:
+      case Iop_NegF64:
+         if (left->tag == Iex_Unop &&
+             (left->Iex.Unop.op == Iop_AbsF32 || left->Iex.Unop.op == Iop_AbsF64))
+            bfpop = S390_BFP_NABS;
+         else
+            bfpop = S390_BFP_NEG;
+         break;
+
+      case Iop_AbsF32:
+      case Iop_AbsF64:        bfpop = S390_BFP_ABS;  break;
+      case Iop_I32StoF64:     bfpop = S390_BFP_I32_TO_F64;  break;
+      case Iop_F32toF64:      bfpop = S390_BFP_F32_TO_F64;  break;
+      default:
+         goto irreducible;
+      }
+
+      /* Process operand */
+      if (op == Iop_I32StoF64)
+         h1 = s390_isel_int_expr(env, left);
+      else if (bfpop == S390_BFP_NABS)
+         h1 = s390_isel_float_expr(env, left->Iex.Unop.arg);
+      else
+         h1 = s390_isel_float_expr(env, left);
+
+      dst = newVRegF(env);
+      rounding_mode = S390_ROUND_NEAREST_EVEN;  /* will not be used later on */
+      addInstr(env, s390_insn_bfp_unop(size, bfpop, dst, h1, rounding_mode));
+      return dst;
+   }
+
+   default:
+      goto irreducible;
+   }
+
+   /* We get here if no pattern matched. */
+ irreducible:
+   ppIRExpr(expr);
+   vpanic("s390_isel_float_expr: cannot reduce tree");
+}
+
+
+static HReg
+s390_isel_float_expr(ISelEnv *env, IRExpr *expr)
+{
+   HReg dst = s390_isel_float_expr_wrk(env, expr);
+
+   /* Sanity checks ... */
+   vassert(hregClass(dst) == HRcFlt64);
+   vassert(hregIsVirtual(dst));
+
+   return dst;
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Condition Code                              ---*/
+/*---------------------------------------------------------*/
+
+/* This function handles all operators that produce a 1-bit result */
+static s390_cc_t
+s390_isel_cc(ISelEnv *env, IRExpr *cond)
+{
+   UChar size;
+
+   vassert(typeOfIRExpr(env->type_env, cond) == Ity_I1);
+
+   /* Constant: either 1 or 0 */
+   if (cond->tag == Iex_Const) {
+      vassert(cond->Iex.Const.con->tag == Ico_U1);
+      vassert(cond->Iex.Const.con->Ico.U1 == True
+              || cond->Iex.Const.con->Ico.U1 == False);
+
+      return cond->Iex.Const.con->Ico.U1 == True ? S390_CC_ALWAYS : S390_CC_NEVER;
+   }
+
+   /* Variable: values are 1 or 0 */
+   if (cond->tag == Iex_RdTmp) {
+      IRTemp tmp = cond->Iex.RdTmp.tmp;
+      HReg   reg = lookupIRTemp(env, tmp);
+
+      /* Load-and-test does not modify REG; so this is OK. */
+      if (typeOfIRTemp(env->type_env, tmp) == Ity_I1)
+         size = 4;
+      else
+         size = sizeofIRType(typeOfIRTemp(env->type_env, tmp));
+      addInstr(env, s390_insn_test(size, s390_opnd_reg(reg)));
+      return S390_CC_NE;
+   }
+
+   /* Unary operators */
+   if (cond->tag == Iex_Unop) {
+      IRExpr *arg = cond->Iex.Unop.arg;
+
+      switch (cond->Iex.Unop.op) {
+      case Iop_Not1:  /* Not1(cond) */
+         /* Generate code for EXPR, and negate the test condition */
+         return s390_cc_invert(s390_isel_cc(env, arg));
+
+         /* Iop_32/64to1  select the LSB from their operand */
+      case Iop_32to1:
+      case Iop_64to1: {
+         HReg dst = s390_isel_int_expr(env, arg);
+
+         size = sizeofIRType(typeOfIRExpr(env->type_env, arg));
+
+         addInstr(env, s390_insn_alu(size, S390_ALU_AND, dst, s390_opnd_imm(1)));
+         addInstr(env, s390_insn_test(size, s390_opnd_reg(dst)));
+         return S390_CC_NE;
+      }
+
+      case Iop_CmpNEZ8:
+      case Iop_CmpNEZ16: {
+         s390_opnd_RMI src;
+         s390_unop_t   op;
+         HReg dst;
+
+         op  = (cond->Iex.Unop.op == Iop_CmpNEZ8) ? S390_ZERO_EXTEND_8
+            : S390_ZERO_EXTEND_16;
+         dst = newVRegI(env);
+         src = s390_isel_int_expr_RMI(env, arg);
+         addInstr(env, s390_insn_unop(4, op, dst, src));
+         addInstr(env, s390_insn_test(4, s390_opnd_reg(dst)));
+         return S390_CC_NE;
+      }
+
+      case Iop_CmpNEZ32:
+      case Iop_CmpNEZ64: {
+         s390_opnd_RMI src;
+
+         src = s390_isel_int_expr_RMI(env, arg);
+         size = sizeofIRType(typeOfIRExpr(env->type_env, arg));
+         addInstr(env, s390_insn_test(size, src));
+         return S390_CC_NE;
+      }
+
+      default:
+         goto fail;
+      }
+   }
+
+   /* Binary operators */
+   if (cond->tag == Iex_Binop) {
+      IRExpr *arg1 = cond->Iex.Binop.arg1;
+      IRExpr *arg2 = cond->Iex.Binop.arg2;
+      HReg reg1, reg2;
+
+      size = sizeofIRType(typeOfIRExpr(env->type_env, arg1));
+
+      switch (cond->Iex.Binop.op) {
+         s390_unop_t op;
+         s390_cc_t   result;
+
+      case Iop_CmpEQ8:
+      case Iop_CasCmpEQ8:
+         op     = S390_ZERO_EXTEND_8;
+         result = S390_CC_E;
+         goto do_compare_ze;
+
+      case Iop_CmpNE8:
+      case Iop_CasCmpNE8:
+         op     = S390_ZERO_EXTEND_8;
+         result = S390_CC_NE;
+         goto do_compare_ze;
+
+      case Iop_CmpEQ16:
+      case Iop_CasCmpEQ16:
+         op     = S390_ZERO_EXTEND_16;
+         result = S390_CC_E;
+         goto do_compare_ze;
+
+      case Iop_CmpNE16:
+      case Iop_CasCmpNE16:
+         op     = S390_ZERO_EXTEND_16;
+         result = S390_CC_NE;
+         goto do_compare_ze;
+
+      do_compare_ze: {
+            s390_opnd_RMI op1, op2;
+
+            op1  = s390_isel_int_expr_RMI(env, arg1);
+            reg1 = newVRegI(env);
+            addInstr(env, s390_insn_unop(4, op, reg1, op1));
+
+            op2  = s390_isel_int_expr_RMI(env, arg2);
+            reg2 = newVRegI(env);
+            addInstr(env, s390_insn_unop(4, op, reg2, op2));  /* zero extend */
+
+            op2 = s390_opnd_reg(reg2);
+            addInstr(env, s390_insn_compare(4, reg1, op2, False));
+
+            return result;
+         }
+
+      case Iop_CmpEQ32:
+      case Iop_CmpEQ64:
+      case Iop_CasCmpEQ32:
+      case Iop_CasCmpEQ64:
+         result = S390_CC_E;
+         goto do_compare;
+
+      case Iop_CmpNE32:
+      case Iop_CmpNE64:
+      case Iop_CasCmpNE32:
+      case Iop_CasCmpNE64:
+         result = S390_CC_NE;
+         goto do_compare;
+
+      do_compare: {
+            HReg op1;
+            s390_opnd_RMI op2;
+
+            order_commutative_operands(arg1, arg2);
+
+            op1 = s390_isel_int_expr(env, arg1);
+            op2 = s390_isel_int_expr_RMI(env, arg2);
+
+            addInstr(env, s390_insn_compare(size, op1, op2, False));
+
+            return result;
+         }
+
+      case Iop_CmpLT32S:
+      case Iop_CmpLE32S:
+      case Iop_CmpLT64S:
+      case Iop_CmpLE64S: {
+         HReg op1;
+         s390_opnd_RMI op2;
+
+         op1 = s390_isel_int_expr(env, arg1);
+         op2 = s390_isel_int_expr_RMI(env, arg2);
+
+         addInstr(env, s390_insn_compare(size, op1, op2, True));
+
+         return (cond->Iex.Binop.op == Iop_CmpLT32S ||
+                 cond->Iex.Binop.op == Iop_CmpLT64S) ? S390_CC_L : S390_CC_LE;
+      }
+
+      case Iop_CmpLT32U:
+      case Iop_CmpLE32U:
+      case Iop_CmpLT64U:
+      case Iop_CmpLE64U: {
+         HReg op1;
+         s390_opnd_RMI op2;
+
+         op1 = s390_isel_int_expr(env, arg1);
+         op2 = s390_isel_int_expr_RMI(env, arg2);
+
+         addInstr(env, s390_insn_compare(size, op1, op2, False));
+
+         return (cond->Iex.Binop.op == Iop_CmpLT32U ||
+                 cond->Iex.Binop.op == Iop_CmpLT64U) ? S390_CC_L : S390_CC_LE;
+      }
+
+      default:
+         goto fail;
+      }
+   }
+
+ fail:
+   ppIRExpr(cond);
+   vpanic("s390_isel_cc: unexpected operator");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Statements                                  ---*/
+/*---------------------------------------------------------*/
+
+static void
+s390_isel_stmt(ISelEnv *env, IRStmt *stmt)
+{
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n -- ");
+      ppIRStmt(stmt);
+      vex_printf("\n");
+   }
+
+   switch (stmt->tag) {
+
+      /* --------- STORE --------- */
+   case Ist_Store: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Store.data);
+      s390_amode *am;
+      HReg src;
+
+      if (stmt->Ist.Store.end != Iend_BE) goto stmt_fail;
+
+      am = s390_isel_amode(env, stmt->Ist.Store.addr);
+
+      switch (tyd) {
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+      case Ity_I64:
+         src = s390_isel_int_expr(env, stmt->Ist.Store.data);
+         break;
+
+      case Ity_F32:
+      case Ity_F64:
+         src = s390_isel_float_expr(env, stmt->Ist.Store.data);
+         break;
+
+      case Ity_F128:
+         /* Cannot occur. No such instruction */
+         vpanic("Ist_Store with F128 data");
+
+      default:
+         goto stmt_fail;
+      }
+
+      addInstr(env, s390_insn_store(sizeofIRType(tyd), am, src));
+      return;
+   }
+
+      /* --------- PUT --------- */
+   case Ist_Put: {
+      IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data);
+      HReg src;
+      s390_amode *am;
+
+      am = s390_amode_for_guest_state(stmt->Ist.Put.offset);
+
+      switch (tyd) {
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+      case Ity_I64:
+         src = s390_isel_int_expr(env, stmt->Ist.Put.data);
+         break;
+
+      case Ity_F32:
+      case Ity_F64:
+         src = s390_isel_float_expr(env, stmt->Ist.Put.data);
+         break;
+
+      case Ity_F128:
+         /* Does not occur. See function put_fpr_pair. */
+         vpanic("Ist_Put with F128 data");
+
+      default:
+         goto stmt_fail;
+      }
+
+      addInstr(env, s390_insn_store(sizeofIRType(tyd), am, src));
+      return;
+   }
+
+      /* --------- TMP --------- */
+   case Ist_WrTmp: {
+      IRTemp tmp = stmt->Ist.WrTmp.tmp;
+      IRType tyd = typeOfIRTemp(env->type_env, tmp);
+      HReg src, dst;
+
+      switch (tyd) {
+      case Ity_I128: {
+         HReg dst_hi, dst_lo, res_hi, res_lo;
+
+         s390_isel_int128_expr(&res_hi, &res_lo, env, stmt->Ist.WrTmp.data);
+         lookupIRTemp128(&dst_hi, &dst_lo, env, tmp);
+
+         addInstr(env, s390_insn_move(8, dst_hi, res_hi));
+         addInstr(env, s390_insn_move(8, dst_lo, res_lo));
+         return;
+      }
+
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+      case Ity_I64:
+         src = s390_isel_int_expr(env, stmt->Ist.WrTmp.data);
+         dst = lookupIRTemp(env, tmp);
+         break;
+
+      case Ity_I1: {
+         s390_cc_t cond = s390_isel_cc(env, stmt->Ist.WrTmp.data);
+         dst = lookupIRTemp(env, tmp);
+         addInstr(env, s390_insn_cc2bool(dst, cond));
+         return;
+      }
+
+      case Ity_F32:
+      case Ity_F64:
+         src = s390_isel_float_expr(env, stmt->Ist.WrTmp.data);
+         dst = lookupIRTemp(env, tmp);
+         break;
+
+      case Ity_F128: {
+         HReg dst_hi, dst_lo, res_hi, res_lo;
+
+         s390_isel_float128_expr(&res_hi, &res_lo, env, stmt->Ist.WrTmp.data);
+         lookupIRTemp128(&dst_hi, &dst_lo, env, tmp);
+
+         addInstr(env, s390_insn_move(8, dst_hi, res_hi));
+         addInstr(env, s390_insn_move(8, dst_lo, res_lo));
+         return;
+      }
+
+      default:
+         goto stmt_fail;
+      }
+
+      addInstr(env, s390_insn_move(sizeofIRType(tyd), dst, src));
+      return;
+   }
+
+      /* --------- Call to DIRTY helper --------- */
+   case Ist_Dirty: {
+      IRType   retty;
+      IRDirty* d = stmt->Ist.Dirty.details;
+      Bool     passBBP;
+
+      if (d->nFxState == 0)
+         vassert(!d->needsBBP);
+
+      passBBP = toBool(d->nFxState > 0 && d->needsBBP);
+
+      doHelperCall(env, passBBP, d->guard, d->cee, d->args);
+
+      /* Now figure out what to do with the returned value, if any. */
+      if (d->tmp == IRTemp_INVALID)
+         /* No return value.  Nothing to do. */
+         return;
+
+      retty = typeOfIRTemp(env->type_env, d->tmp);
+      if (retty == Ity_I64 || retty == Ity_I32
+          || retty == Ity_I16 || retty == Ity_I8) {
+         /* Move the returned value into the return register */
+         HReg dst = lookupIRTemp(env, d->tmp);
+         addInstr(env, s390_insn_move(sizeofIRType(retty), dst,
+                                      mkHReg(S390_REGNO_RETURN_VALUE,
+                                             HRcInt64, False)));
+         return;
+      }
+      break;
+   }
+
+   case Ist_CAS:
+      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
+         IRCAS *cas = stmt->Ist.CAS.details;
+         s390_amode *op2 = s390_isel_amode(env, cas->addr);
+         HReg op3 = s390_isel_int_expr(env, cas->dataLo);  /* new value */
+         HReg op1 = s390_isel_int_expr(env, cas->expdLo);  /* expected value */
+         HReg old = lookupIRTemp(env, cas->oldLo);
+
+         if (typeOfIRTemp(env->type_env, cas->oldLo) == Ity_I32) {
+            addInstr(env, s390_insn_cas(4, op1, op2, op3, old));
+         } else {
+            addInstr(env, s390_insn_cas(8, op1, op2, op3, old));
+         }
+         return;
+      } else {
+         vpanic("compare double and swap not implemented\n");
+      }
+      break;
+
+      /* --------- EXIT --------- */
+   case Ist_Exit: {
+      s390_opnd_RMI dst;
+      s390_cc_t cond;
+      IRConstTag tag = stmt->Ist.Exit.dst->tag;
+
+      if (tag != Ico_U64)
+         vpanic("s390_isel_stmt: Ist_Exit: dst is not a 64-bit value");
+
+      dst  = s390_isel_int_expr_RMI(env, IRExpr_Const(stmt->Ist.Exit.dst));
+      cond = s390_isel_cc(env, stmt->Ist.Exit.guard);
+      addInstr(env, s390_insn_branch(stmt->Ist.Exit.jk, cond, dst));
+      return;
+   }
+
+      /* --------- MEM FENCE --------- */
+   case Ist_MBE:
+      switch (stmt->Ist.MBE.event) {
+         case Imbe_Fence:
+            addInstr(env, s390_insn_mfence());
+            return;
+         default:
+            break;
+      }
+      break;
+
+      /* --------- Miscellaneous --------- */
+
+   case Ist_PutI:    /* Not needed */
+   case Ist_IMark:   /* Doesn't generate any executable code */
+   case Ist_NoOp:    /* Doesn't generate any executable code */
+   case Ist_AbiHint: /* Meaningless in IR */
+      return;
+
+   default:
+      break;
+   }
+
+ stmt_fail:
+   ppIRStmt(stmt);
+   vpanic("s390_isel_stmt");
+}
+
+
+/*---------------------------------------------------------*/
+/*--- ISEL: Basic block terminators (Nexts)             ---*/
+/*---------------------------------------------------------*/
+
+static void
+iselNext(ISelEnv *env, IRExpr *next, IRJumpKind jk)
+{
+   s390_opnd_RMI dst;
+
+   if (vex_traceflags & VEX_TRACE_VCODE) {
+      vex_printf("\n-- goto {");
+      ppIRJumpKind(jk);
+      vex_printf("} ");
+      ppIRExpr(next);
+      vex_printf("\n");
+   }
+
+   dst = s390_isel_int_expr_RMI(env, next);
+   addInstr(env, s390_insn_branch(jk, S390_CC_ALWAYS, dst));
+}
+
+
+/*---------------------------------------------------------*/
+/*--- Insn selector top-level                           ---*/
+/*---------------------------------------------------------*/
+
+/* Translate an entire SB to s390 code. */
+
+HInstrArray *
+iselSB_S390(IRSB *bb, VexArch arch_host, VexArchInfo *archinfo_host,
+             VexAbiInfo *vbi)
+{
+   UInt     i, j;
+   HReg     hreg, hregHI;
+   ISelEnv *env;
+   UInt     hwcaps_host = archinfo_host->hwcaps;
+
+   /* KLUDGE: export archinfo_host. */
+   s390_archinfo_host = archinfo_host;
+
+   /* Do some sanity checks */
+   vassert((VEX_HWCAPS_S390X(hwcaps_host) & ~(VEX_HWCAPS_S390X_ALL)) == 0);
+
+   /* Make up an initial environment to use. */
+   env = LibVEX_Alloc(sizeof(ISelEnv));
+   env->vreg_ctr = 0;
+
+   /* Set up output code array. */
+   env->code = newHInstrArray();
+
+   /* Copy BB's type env. */
+   env->type_env = bb->tyenv;
+
+   /* Make up an IRTemp -> virtual HReg mapping.  This doesn't
+      change as we go along. For some reason types_used has Int type -- but
+      it should be unsigned. Internally we use an unsigned type; so we
+      assert it here. */
+   vassert(bb->tyenv->types_used >= 0);
+
+   env->n_vregmap = bb->tyenv->types_used;
+   env->vregmap   = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+   env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+
+   /* and finally ... */
+   env->hwcaps    = hwcaps_host;
+
+   /* For each IR temporary, allocate a suitably-kinded virtual
+      register. */
+   j = 0;
+   for (i = 0; i < env->n_vregmap; i++) {
+      hregHI = hreg = INVALID_HREG;
+      switch (bb->tyenv->types[i]) {
+      case Ity_I1:
+      case Ity_I8:
+      case Ity_I16:
+      case Ity_I32:
+         hreg = mkHReg(j++, HRcInt64, True);
+         break;
+
+      case Ity_I64:
+         hreg   = mkHReg(j++, HRcInt64, True);
+         break;
+
+      case Ity_I128:
+         hreg   = mkHReg(j++, HRcInt64, True);
+         hregHI = mkHReg(j++, HRcInt64, True);
+         break;
+
+      case Ity_F32:
+      case Ity_F64:
+         hreg = mkHReg(j++, HRcFlt64, True);
+         break;
+
+      case Ity_F128:
+         hreg   = mkHReg(j++, HRcFlt64, True);
+         hregHI = mkHReg(j++, HRcFlt64, True);
+         break;
+
+      case Ity_V128: /* fall through */
+      default:
+         ppIRType(bb->tyenv->types[i]);
+         vpanic("s390_isel_sb: IRTemp type");
+      }
+
+      env->vregmap[i]   = hreg;
+      env->vregmapHI[i] = hregHI;
+   }
+   env->vreg_ctr = j;
+
+   /* Ok, finally we can iterate over the statements. */
+   for (i = 0; i < bb->stmts_used; i++)
+      if (bb->stmts[i])
+         s390_isel_stmt(env, bb->stmts[i]);
+
+   iselNext(env, bb->next, bb->jumpkind);
+
+   /* Record the number of vregs we used. */
+   env->code->n_vregs = env->vreg_ctr;
+
+   return env->code;
+}
+
+/*---------------------------------------------------------------*/
+/*--- end                                    host_s390_isel.c ---*/
+/*---------------------------------------------------------------*/
diff --git a/main/VEX/priv/host_x86_defs.c b/main/VEX/priv/host_x86_defs.c
index e11b916..e26a076 100644
--- a/main/VEX/priv/host_x86_defs.c
+++ b/main/VEX/priv/host_x86_defs.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -1989,7 +1989,9 @@
    imperative to emit position-independent code. */
 
 Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i, 
-                    Bool mode64, void* dispatch )
+                    Bool mode64,
+                    void* dispatch_unassisted,
+                    void* dispatch_assisted )
 {
    UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
 
@@ -2304,7 +2306,11 @@
       *p++ = toUChar(0xD0 + irno);
       goto done;
 
-   case Xin_Goto:
+   case Xin_Goto: {
+      void* dispatch_to_use = NULL;
+      vassert(dispatch_unassisted != NULL);
+      vassert(dispatch_assisted != NULL);
+
       /* Use ptmp for backpatching conditional jumps. */
       ptmp = NULL;
 
@@ -2318,7 +2324,10 @@
       }
 
       /* If a non-boring, set %ebp (the guest state pointer)
-         appropriately. */
+         appropriately.  Also, decide which dispatcher we need to
+         use. */
+      dispatch_to_use = dispatch_assisted;
+
       /* movl $magic_number, %ebp */
       switch (i->Xin.Goto.jk) {
          case Ijk_ClientReq: 
@@ -2366,6 +2375,7 @@
          case Ijk_Ret:
 	 case Ijk_Call:
          case Ijk_Boring:
+            dispatch_to_use = dispatch_unassisted;
             break;
          default: 
             ppIRJumpKind(i->Xin.Goto.jk);
@@ -2390,10 +2400,10 @@
          after the load of %eax since %edx might be carrying the value
          destined for %eax immediately prior to this Xin_Goto. */
       vassert(sizeof(UInt) == sizeof(void*));
-      vassert(dispatch != NULL);
+      vassert(dispatch_to_use != NULL);
       /* movl $imm32, %edx */
       *p++ = 0xBA;
-      p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
+      p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
 
       /* jmp *%edx */
       *p++ = 0xFF;
@@ -2406,6 +2416,7 @@
          *ptmp = toUChar(delta-1);
       }
       goto done;
+   }
 
    case Xin_CMov32:
       vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
diff --git a/main/VEX/priv/host_x86_defs.h b/main/VEX/priv/host_x86_defs.h
index fde700a..f68a426 100644
--- a/main/VEX/priv/host_x86_defs.h
+++ b/main/VEX/priv/host_x86_defs.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -673,7 +673,9 @@
 extern void         mapRegs_X86Instr     ( HRegRemap*, X86Instr*, Bool );
 extern Bool         isMove_X86Instr      ( X86Instr*, HReg*, HReg* );
 extern Int          emit_X86Instr        ( UChar* buf, Int nbuf, X86Instr*, 
-                                           Bool, void* dispatch );
+                                           Bool,
+                                           void* dispatch_unassisted,
+                                           void* dispatch_assisted );
 
 extern void genSpill_X86  ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
                             HReg rreg, Int offset, Bool );
diff --git a/main/VEX/priv/host_x86_isel.c b/main/VEX/priv/host_x86_isel.c
index fc5cf05..81896b3 100644
--- a/main/VEX/priv/host_x86_isel.c
+++ b/main/VEX/priv/host_x86_isel.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -42,6 +42,7 @@
 #include "main_globals.h"
 #include "host_generic_regs.h"
 #include "host_generic_simd64.h"
+#include "host_generic_simd128.h"
 #include "host_x86_defs.h"
 
 /* TODO 21 Apr 2005:
@@ -1835,6 +1836,25 @@
       }
    }
 
+   /* CmpNE32(ccall, 32-bit constant) (--smc-check=all optimisation).
+      Saves a "movl %eax, %tmp" compared to the default route. */
+   if (e->tag == Iex_Binop 
+       && e->Iex.Binop.op == Iop_CmpNE32
+       && e->Iex.Binop.arg1->tag == Iex_CCall
+       && e->Iex.Binop.arg2->tag == Iex_Const) {
+      IRExpr* cal = e->Iex.Binop.arg1;
+      IRExpr* con = e->Iex.Binop.arg2;
+      /* clone & partial-eval of generic Iex_CCall and Iex_Const cases */
+      vassert(cal->Iex.CCall.retty == Ity_I32); /* else ill-typed IR */
+      vassert(con->Iex.Const.con->tag == Ico_U32);
+      /* Marshal args, do the call. */
+      doHelperCall( env, False, NULL, cal->Iex.CCall.cee, cal->Iex.CCall.args );
+      addInstr(env, X86Instr_Alu32R(Xalu_CMP,
+                                    X86RMI_Imm(con->Iex.Const.con->Ico.U32),
+                                    hregX86_EAX()));
+      return Xcc_NZ;
+   }
+
    /* Cmp*32*(x,y) */
    if (e->tag == Iex_Binop 
        && (e->Iex.Binop.op == Iop_CmpEQ32
@@ -2367,12 +2387,16 @@
          case Iop_QAdd16Ux4:
             fn = (HWord)h_generic_calc_QAdd16Ux4; goto binnish;
 
-         case Iop_QNarrow32Sx2:
-            fn = (HWord)h_generic_calc_QNarrow32Sx2; goto binnish;
-         case Iop_QNarrow16Sx4:
-            fn = (HWord)h_generic_calc_QNarrow16Sx4; goto binnish;
-         case Iop_QNarrow16Ux4:
-            fn = (HWord)h_generic_calc_QNarrow16Ux4; goto binnish;
+         case Iop_QNarrowBin32Sto16Sx4:
+            fn = (HWord)h_generic_calc_QNarrowBin32Sto16Sx4; goto binnish;
+         case Iop_QNarrowBin16Sto8Sx8:
+            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
+         case Iop_QNarrowBin16Sto8Ux8:
+            fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
 
          case Iop_QSub8Sx8:
             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
@@ -3116,6 +3140,7 @@
 #  define SSE2_OR_ABOVE                                   \
        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
 
+   HWord     fn = 0; /* address of helper fn, if required */
    MatchInfo mi;
    Bool      arg1isEReg = False;
    X86SseOp  op = Xsse_INVALID;
@@ -3481,11 +3506,11 @@
          return dst;
       }
 
-      case Iop_QNarrow32Sx4: 
+      case Iop_QNarrowBin32Sto16Sx8: 
          op = Xsse_PACKSSD; arg1isEReg = True; goto do_SseReRg;
-      case Iop_QNarrow16Sx8: 
+      case Iop_QNarrowBin16Sto8Sx16: 
          op = Xsse_PACKSSW; arg1isEReg = True; goto do_SseReRg;
-      case Iop_QNarrow16Ux8: 
+      case Iop_QNarrowBin16Sto8Ux16: 
          op = Xsse_PACKUSW; arg1isEReg = True; goto do_SseReRg;
 
       case Iop_InterleaveHI8x16: 
@@ -3582,6 +3607,59 @@
          return dst;
       }
 
+      case Iop_NarrowBin32to16x8:
+         fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+         goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+         fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+         goto do_SseAssistedBinary;
+      do_SseAssistedBinary: {
+         /* As with the amd64 case (where this is copied from) we
+            generate pretty bad code. */
+         vassert(fn != 0);
+         HReg dst = newVRegV(env);
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg argp = newVRegI(env);
+         /* subl $112, %esp         -- make a space */
+         sub_from_esp(env, 112);
+         /* leal 48(%esp), %r_argp  -- point into it */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
+                                      argp));
+         /* andl $-16, %r_argp      -- 16-align the pointer */
+         addInstr(env, X86Instr_Alu32R(Xalu_AND,
+                                       X86RMI_Imm( ~(UInt)15 ), 
+                                       argp));
+         /* Prepare 3 arg regs:
+            leal  0(%r_argp), %eax
+            leal 16(%r_argp), %edx
+            leal 32(%r_argp), %ecx
+         */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
+                                      hregX86_EAX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
+                                      hregX86_EDX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
+                                      hregX86_ECX()));
+         /* Store the two args, at (%edx) and (%ecx):
+            movupd  %argL, 0(%edx)
+            movupd  %argR, 0(%ecx)
+         */
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
+                                        X86AMode_IR(0, hregX86_EDX())));
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
+                                        X86AMode_IR(0, hregX86_ECX())));
+         /* call the helper */
+         addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
+         /* fetch the result from memory, using %r_argp, which the
+            register allocator will keep alive across the call. */
+         addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
+                                        X86AMode_IR(0, argp)));
+         /* and finally, clear the space */
+         add_to_esp(env, 112);
+         return dst;
+      }
+
       default:
          break;
    } /* switch (e->Iex.Binop.op) */
diff --git a/main/VEX/priv/ir_defs.c b/main/VEX/priv/ir_defs.c
index 25a87c9..f68f21f 100644
--- a/main/VEX/priv/ir_defs.c
+++ b/main/VEX/priv/ir_defs.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -56,6 +56,7 @@
       case Ity_I128:    vex_printf( "I128"); break;
       case Ity_F32:     vex_printf( "F32");  break;
       case Ity_F64:     vex_printf( "F64");  break;
+      case Ity_F128:    vex_printf( "F128"); break;
       case Ity_V128:    vex_printf( "V128"); break;
       default: vex_printf("ty = 0x%x\n", (Int)ty);
                vpanic("ppIRType");
@@ -64,7 +65,7 @@
 
 void ppIRConst ( IRConst* con )
 {
-   union { ULong i64; Double f64; } u;
+   union { ULong i64; Double f64; UInt i32; Float f32; } u;
    vassert(sizeof(ULong) == sizeof(Double));
    switch (con->tag) {
       case Ico_U1:   vex_printf( "%d:I1",        con->Ico.U1 ? 1 : 0); break;
@@ -72,6 +73,10 @@
       case Ico_U16:  vex_printf( "0x%x:I16",     (UInt)(con->Ico.U16)); break;
       case Ico_U32:  vex_printf( "0x%x:I32",     (UInt)(con->Ico.U32)); break;
       case Ico_U64:  vex_printf( "0x%llx:I64",   (ULong)(con->Ico.U64)); break;
+      case Ico_F32:  u.f32 = con->Ico.F32;
+                     vex_printf( "F32{0x%x}",   u.i32);
+                     break;
+      case Ico_F32i: vex_printf( "F32i{0x%x}",   con->Ico.F32i); break;
       case Ico_F64:  u.f64 = con->Ico.F64;
                      vex_printf( "F64{0x%llx}",  u.i64);
                      break;
@@ -215,6 +220,10 @@
       case Iop_DivS32: vex_printf("DivS32"); return;
       case Iop_DivU64: vex_printf("DivU64"); return;
       case Iop_DivS64: vex_printf("DivS64"); return;
+      case Iop_DivU64E: vex_printf("DivU64E"); return;
+      case Iop_DivS64E: vex_printf("DivS64E"); return;
+      case Iop_DivU32E: vex_printf("DivU32E"); return;
+      case Iop_DivS32E: vex_printf("DivS32E"); return;
 
       case Iop_DivModU64to32: vex_printf("DivModU64to32"); return;
       case Iop_DivModS64to32: vex_printf("DivModS64to32"); return;
@@ -222,6 +231,8 @@
       case Iop_DivModU128to64: vex_printf("DivModU128to64"); return;
       case Iop_DivModS128to64: vex_printf("DivModS128to64"); return;
 
+      case Iop_DivModS64to64: vex_printf("DivModS64to64"); return;
+
       case Iop_16HIto8:  vex_printf("16HIto8"); return;
       case Iop_16to8:    vex_printf("16to8");   return;
       case Iop_8HLto16:  vex_printf("8HLto16"); return;
@@ -238,6 +249,14 @@
       case Iop_128to64:   vex_printf("128to64");   return;
       case Iop_64HLto128: vex_printf("64HLto128"); return;
 
+      case Iop_CmpF32:    vex_printf("CmpF32");    return;
+      case Iop_F32toI16S: vex_printf("F32toI16S");  return;
+      case Iop_F32toI32S: vex_printf("F32toI32S");  return;
+      case Iop_F32toI64S: vex_printf("F32toI64S");  return;
+      case Iop_I16StoF32: vex_printf("I16StoF32");  return;
+      case Iop_I32StoF32: vex_printf("I32StoF32");  return;
+      case Iop_I64StoF32: vex_printf("I64StoF32");  return;
+
       case Iop_AddF64:    vex_printf("AddF64"); return;
       case Iop_SubF64:    vex_printf("SubF64"); return;
       case Iop_MulF64:    vex_printf("MulF64"); return;
@@ -251,6 +270,32 @@
       case Iop_MulF32:    vex_printf("MulF32"); return;
       case Iop_DivF32:    vex_printf("DivF32"); return;
 
+        /* 128 bit floating point */
+      case Iop_AddF128:   vex_printf("AddF128");  return;
+      case Iop_SubF128:   vex_printf("SubF128");  return;
+      case Iop_MulF128:   vex_printf("MulF128");  return;
+      case Iop_DivF128:   vex_printf("DivF128");  return;
+      case Iop_AbsF128:   vex_printf("AbsF128");  return;
+      case Iop_NegF128:   vex_printf("NegF128");  return;
+      case Iop_SqrtF128:  vex_printf("SqrtF128"); return;
+      case Iop_CmpF128:   vex_printf("CmpF128");  return;
+
+      case Iop_F64HLtoF128: vex_printf("F64HLtoF128"); return;
+      case Iop_F128HItoF64: vex_printf("F128HItoF64"); return;
+      case Iop_F128LOtoF64: vex_printf("F128LOtoF64"); return;
+      case Iop_I32StoF128: vex_printf("I32StoF128"); return;
+      case Iop_I64StoF128: vex_printf("I64StoF128"); return;
+      case Iop_F128toI32S: vex_printf("F128toI32S"); return;
+      case Iop_F128toI64S: vex_printf("F128toI64S"); return;
+      case Iop_F32toF128:  vex_printf("F32toF128");  return;
+      case Iop_F64toF128:  vex_printf("F64toF128");  return;
+      case Iop_F128toF64:  vex_printf("F128toF64");  return;
+      case Iop_F128toF32:  vex_printf("F128toF32");  return;
+
+        /* s390 specific */
+      case Iop_MAddF32:    vex_printf("s390_MAddF32"); return;
+      case Iop_MSubF32:    vex_printf("s390_MSubF32"); return;
+
       case Iop_ScaleF64:      vex_printf("ScaleF64"); return;
       case Iop_AtanF64:       vex_printf("AtanF64"); return;
       case Iop_Yl2xF64:       vex_printf("Yl2xF64"); return;
@@ -315,12 +360,15 @@
       case Iop_F64toI16S: vex_printf("F64toI16S"); return;
       case Iop_F64toI32S: vex_printf("F64toI32S"); return;
       case Iop_F64toI64S: vex_printf("F64toI64S"); return;
+      case Iop_F64toI64U: vex_printf("F64toI64U"); return;
 
       case Iop_F64toI32U: vex_printf("F64toI32U"); return;
 
       case Iop_I16StoF64: vex_printf("I16StoF64"); return;
       case Iop_I32StoF64: vex_printf("I32StoF64"); return;
       case Iop_I64StoF64: vex_printf("I64StoF64"); return;
+      case Iop_I64UtoF64: vex_printf("I64UtoF64"); return;
+      case Iop_I64UtoF32: vex_printf("I64UtoF32"); return;
 
       case Iop_I32UtoF64: vex_printf("I32UtoF64"); return;
 
@@ -463,9 +511,11 @@
       case Iop_SarN8x8: vex_printf("SarN8x8"); return;
       case Iop_SarN16x4: vex_printf("SarN16x4"); return;
       case Iop_SarN32x2: vex_printf("SarN32x2"); return;
-      case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return;
-      case Iop_QNarrow16Sx4: vex_printf("QNarrow16Sx4"); return;
-      case Iop_QNarrow32Sx2: vex_printf("QNarrow32Sx2"); return;
+      case Iop_QNarrowBin16Sto8Ux8: vex_printf("QNarrowBin16Sto8Ux8"); return;
+      case Iop_QNarrowBin16Sto8Sx8: vex_printf("QNarrowBin16Sto8Sx8"); return;
+      case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
+      case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
+      case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
       case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
       case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
       case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -729,6 +779,7 @@
       case Iop_CmpEQ8x16:  vex_printf("CmpEQ8x16"); return;
       case Iop_CmpEQ16x8:  vex_printf("CmpEQ16x8"); return;
       case Iop_CmpEQ32x4:  vex_printf("CmpEQ32x4"); return;
+      case Iop_CmpEQ64x2:  vex_printf("CmpEQ64x2"); return;
       case Iop_CmpGT8Sx16: vex_printf("CmpGT8Sx16"); return;
       case Iop_CmpGT16Sx8: vex_printf("CmpGT16Sx8"); return;
       case Iop_CmpGT32Sx4: vex_printf("CmpGT32Sx4"); return;
@@ -801,30 +852,32 @@
       case Iop_Rol16x8: vex_printf("Rol16x8"); return;
       case Iop_Rol32x4: vex_printf("Rol32x4"); return;
 
-      case Iop_Narrow16x8:   vex_printf("Narrow16x8"); return;
-      case Iop_Narrow32x4:   vex_printf("Narrow32x4"); return;
-      case Iop_QNarrow16Ux8: vex_printf("QNarrow16Ux8"); return;
-      case Iop_QNarrow32Ux4: vex_printf("QNarrow32Ux4"); return;
-      case Iop_QNarrow16Sx8: vex_printf("QNarrow16Sx8"); return;
-      case Iop_QNarrow32Sx4: vex_printf("QNarrow32Sx4"); return;
-      case Iop_Shorten16x8: vex_printf("Shorten16x8"); return;
-      case Iop_Shorten32x4: vex_printf("Shorten32x4"); return;
-      case Iop_Shorten64x2: vex_printf("Shorten64x2"); return;
-      case Iop_QShortenU16Ux8: vex_printf("QShortenU16Ux8"); return;
-      case Iop_QShortenU32Ux4: vex_printf("QShortenU32Ux4"); return;
-      case Iop_QShortenU64Ux2: vex_printf("QShortenU64Ux2"); return;
-      case Iop_QShortenS16Sx8: vex_printf("QShortenS16Sx8"); return;
-      case Iop_QShortenS32Sx4: vex_printf("QShortenS32Sx4"); return;
-      case Iop_QShortenS64Sx2: vex_printf("QShortenS64Sx2"); return;
-      case Iop_QShortenU16Sx8: vex_printf("QShortenU16Sx8"); return;
-      case Iop_QShortenU32Sx4: vex_printf("QShortenU32Sx4"); return;
-      case Iop_QShortenU64Sx2: vex_printf("QShortenU64Sx2"); return;
-      case Iop_Longen8Ux8: vex_printf("Longen8Ux8"); return;
-      case Iop_Longen16Ux4: vex_printf("Longen16Ux4"); return;
-      case Iop_Longen32Ux2: vex_printf("Longen32Ux2"); return;
-      case Iop_Longen8Sx8: vex_printf("Longen8Sx8"); return;
-      case Iop_Longen16Sx4: vex_printf("Longen16Sx4"); return;
-      case Iop_Longen32Sx2: vex_printf("Longen32Sx2"); return;
+      case Iop_NarrowBin16to8x16:    vex_printf("NarrowBin16to8x16"); return;
+      case Iop_NarrowBin32to16x8:    vex_printf("NarrowBin32to16x8"); return;
+      case Iop_QNarrowBin16Uto8Ux16: vex_printf("QNarrowBin16Uto8Ux16"); return;
+      case Iop_QNarrowBin32Sto16Ux8: vex_printf("QNarrowBin32Sto16Ux8"); return;
+      case Iop_QNarrowBin16Sto8Ux16: vex_printf("QNarrowBin16Sto8Ux16"); return;
+      case Iop_QNarrowBin32Uto16Ux8: vex_printf("QNarrowBin32Uto16Ux8"); return;
+      case Iop_QNarrowBin16Sto8Sx16: vex_printf("QNarrowBin16Sto8Sx16"); return;
+      case Iop_QNarrowBin32Sto16Sx8: vex_printf("QNarrowBin32Sto16Sx8"); return;
+      case Iop_NarrowUn16to8x8:     vex_printf("NarrowUn16to8x8");  return;
+      case Iop_NarrowUn32to16x4:    vex_printf("NarrowUn32to16x4"); return;
+      case Iop_NarrowUn64to32x2:    vex_printf("NarrowUn64to32x2"); return;
+      case Iop_QNarrowUn16Uto8Ux8:  vex_printf("QNarrowUn16Uto8Ux8");  return;
+      case Iop_QNarrowUn32Uto16Ux4: vex_printf("QNarrowUn32Uto16Ux4"); return;
+      case Iop_QNarrowUn64Uto32Ux2: vex_printf("QNarrowUn64Uto32Ux2"); return;
+      case Iop_QNarrowUn16Sto8Sx8:  vex_printf("QNarrowUn16Sto8Sx8");  return;
+      case Iop_QNarrowUn32Sto16Sx4: vex_printf("QNarrowUn32Sto16Sx4"); return;
+      case Iop_QNarrowUn64Sto32Sx2: vex_printf("QNarrowUn64Sto32Sx2"); return;
+      case Iop_QNarrowUn16Sto8Ux8:  vex_printf("QNarrowUn16Sto8Ux8");  return;
+      case Iop_QNarrowUn32Sto16Ux4: vex_printf("QNarrowUn32Sto16Ux4"); return;
+      case Iop_QNarrowUn64Sto32Ux2: vex_printf("QNarrowUn64Sto32Ux2"); return;
+      case Iop_Widen8Uto16x8:  vex_printf("Widen8Uto16x8");  return;
+      case Iop_Widen16Uto32x4: vex_printf("Widen16Uto32x4"); return;
+      case Iop_Widen32Uto64x2: vex_printf("Widen32Uto64x2"); return;
+      case Iop_Widen8Sto16x8:  vex_printf("Widen8Sto16x8");  return;
+      case Iop_Widen16Sto32x4: vex_printf("Widen16Sto32x4"); return;
+      case Iop_Widen32Sto64x2: vex_printf("Widen32Sto64x2"); return;
 
       case Iop_InterleaveHI8x16: vex_printf("InterleaveHI8x16"); return;
       case Iop_InterleaveHI16x8: vex_printf("InterleaveHI16x8"); return;
@@ -1090,8 +1143,12 @@
 void ppIRMBusEvent ( IRMBusEvent event )
 {
    switch (event) {
-      case Imbe_Fence: vex_printf("Fence"); break;
-      default:         vpanic("ppIRMBusEvent");
+      case Imbe_Fence:
+         vex_printf("Fence"); break;
+      case Imbe_CancelReservation:
+         vex_printf("CancelReservation"); break;
+      default:
+         vpanic("ppIRMBusEvent");
    }
 }
 
@@ -1106,8 +1163,9 @@
          vex_printf("IR-NoOp");
          break;
       case Ist_IMark:
-         vex_printf( "------ IMark(0x%llx, %d) ------", 
-                     s->Ist.IMark.addr, s->Ist.IMark.len);
+         vex_printf( "------ IMark(0x%llx, %d, %u) ------", 
+                     s->Ist.IMark.addr, s->Ist.IMark.len,
+                     (UInt)s->Ist.IMark.delta);
          break;
       case Ist_AbiHint:
          vex_printf("====== AbiHint(");
@@ -1259,6 +1317,20 @@
    c->Ico.U64 = u64;
    return c;
 }
+IRConst* IRConst_F32 ( Float f32 )
+{
+   IRConst* c = LibVEX_Alloc(sizeof(IRConst));
+   c->tag     = Ico_F32;
+   c->Ico.F32 = f32;
+   return c;
+}
+IRConst* IRConst_F32i ( UInt f32i )
+{
+   IRConst* c  = LibVEX_Alloc(sizeof(IRConst));
+   c->tag      = Ico_F32i;
+   c->Ico.F32i = f32i;
+   return c;
+}
 IRConst* IRConst_F64 ( Double f64 )
 {
    IRConst* c = LibVEX_Alloc(sizeof(IRConst));
@@ -1548,11 +1620,12 @@
    static_closure.tag = Ist_NoOp;
    return &static_closure;
 }
-IRStmt* IRStmt_IMark ( Addr64 addr, Int len ) {
-   IRStmt* s         = LibVEX_Alloc(sizeof(IRStmt));
-   s->tag            = Ist_IMark;
-   s->Ist.IMark.addr = addr;
-   s->Ist.IMark.len  = len;
+IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta ) {
+   IRStmt* s          = LibVEX_Alloc(sizeof(IRStmt));
+   s->tag             = Ist_IMark;
+   s->Ist.IMark.addr  = addr;
+   s->Ist.IMark.len   = len;
+   s->Ist.IMark.delta = delta;
    return s;
 }
 IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ) {
@@ -1707,6 +1780,8 @@
       case Ico_U16:  return IRConst_U16(c->Ico.U16);
       case Ico_U32:  return IRConst_U32(c->Ico.U32);
       case Ico_U64:  return IRConst_U64(c->Ico.U64);
+      case Ico_F32:  return IRConst_F32(c->Ico.F32);
+      case Ico_F32i: return IRConst_F32i(c->Ico.F32i);
       case Ico_F64:  return IRConst_F64(c->Ico.F64);
       case Ico_F64i: return IRConst_F64i(c->Ico.F64i);
       case Ico_V128: return IRConst_V128(c->Ico.V128);
@@ -1813,7 +1888,9 @@
                                s->Ist.AbiHint.len,
                                deepCopyIRExpr(s->Ist.AbiHint.nia));
       case Ist_IMark:
-         return IRStmt_IMark(s->Ist.IMark.addr, s->Ist.IMark.len);
+         return IRStmt_IMark(s->Ist.IMark.addr,
+                             s->Ist.IMark.len,
+                             s->Ist.IMark.delta);
       case Ist_Put: 
          return IRStmt_Put(s->Ist.Put.offset, 
                            deepCopyIRExpr(s->Ist.Put.data));
@@ -1990,8 +2067,9 @@
       case Iop_QAdd32Ux2: case Iop_QAdd64Ux1:
       case Iop_PwAdd8x8: case Iop_PwAdd16x4: case Iop_PwAdd32x2:
       case Iop_PwAdd32Fx2:
-      case Iop_QNarrow32Sx2:
-      case Iop_QNarrow16Sx4: case Iop_QNarrow16Ux4:
+      case Iop_QNarrowBin32Sto16Sx4:
+      case Iop_QNarrowBin16Sto8Sx8: case Iop_QNarrowBin16Sto8Ux8:
+      case Iop_NarrowBin16to8x8: case Iop_NarrowBin32to16x4:
       case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
       case Iop_QSub8Sx8: case Iop_QSub16Sx4:
       case Iop_QSub32Sx2: case Iop_QSub64Sx1:
@@ -2097,10 +2175,10 @@
       case Iop_Clz64: case Iop_Ctz64:
          UNARY(Ity_I64, Ity_I64);
 
-      case Iop_DivU32: case Iop_DivS32:
+      case Iop_DivU32: case Iop_DivS32: case Iop_DivU32E: case Iop_DivS32E:
          BINARY(Ity_I32,Ity_I32, Ity_I32);
 
-      case Iop_DivU64: case Iop_DivS64:
+      case Iop_DivU64: case Iop_DivS64: case Iop_DivS64E: case Iop_DivU64E:
          BINARY(Ity_I64,Ity_I64, Ity_I64);
 
       case Iop_DivModU64to32: case Iop_DivModS64to32:
@@ -2109,6 +2187,9 @@
       case Iop_DivModU128to64: case Iop_DivModS128to64:
          BINARY(Ity_I128,Ity_I64, Ity_I128);
 
+      case Iop_DivModS64to64:
+         BINARY(Ity_I64,Ity_I64, Ity_I128);
+
       case Iop_16HIto8: case Iop_16to8:
          UNARY(Ity_I16, Ity_I8);
       case Iop_8HLto16:
@@ -2185,21 +2266,38 @@
       case Iop_RoundF32toInt:
          BINARY(ity_RMode,Ity_F32, Ity_F32);
 
+      case Iop_CmpF32:
+         BINARY(Ity_F32,Ity_F32, Ity_I32);
+
       case Iop_CmpF64:
          BINARY(Ity_F64,Ity_F64, Ity_I32);
 
+      case Iop_CmpF128:
+         BINARY(Ity_F128,Ity_F128, Ity_I32);
+
       case Iop_F64toI16S: BINARY(ity_RMode,Ity_F64, Ity_I16);
       case Iop_F64toI32S: BINARY(ity_RMode,Ity_F64, Ity_I32);
-      case Iop_F64toI64S: BINARY(ity_RMode,Ity_F64, Ity_I64);
+      case Iop_F64toI64S: case Iop_F64toI64U:
+         BINARY(ity_RMode,Ity_F64, Ity_I64);
 
       case Iop_F64toI32U: BINARY(ity_RMode,Ity_F64, Ity_I32);
 
       case Iop_I16StoF64: UNARY(Ity_I16, Ity_F64);
       case Iop_I32StoF64: UNARY(Ity_I32, Ity_F64);
       case Iop_I64StoF64: BINARY(ity_RMode,Ity_I64, Ity_F64);
+      case Iop_I64UtoF64: BINARY(ity_RMode,Ity_I64, Ity_F64);
+      case Iop_I64UtoF32: BINARY(ity_RMode,Ity_I64, Ity_F32);
 
       case Iop_I32UtoF64: UNARY(Ity_I32, Ity_F64);
 
+      case Iop_F32toI16S: BINARY(ity_RMode,Ity_F32, Ity_I16);
+      case Iop_F32toI32S: BINARY(ity_RMode,Ity_F32, Ity_I32);
+      case Iop_F32toI64S: BINARY(ity_RMode,Ity_F32, Ity_I64);
+
+      case Iop_I16StoF32: UNARY(Ity_I16, Ity_F32);
+      case Iop_I32StoF32: BINARY(ity_RMode,Ity_I32, Ity_F32);
+      case Iop_I64StoF32: BINARY(ity_RMode,Ity_I64, Ity_F32);
+
       case Iop_F32toF64: UNARY(Ity_F32, Ity_F64);
       case Iop_F64toF32: BINARY(ity_RMode,Ity_F64, Ity_F32);
 
@@ -2249,17 +2347,31 @@
       case Iop_Rsqrte32x4:
          UNARY(Ity_V128, Ity_V128);
 
-      case Iop_64HLtoV128: BINARY(Ity_I64,Ity_I64, Ity_V128);
+      case Iop_64HLtoV128:
+         BINARY(Ity_I64,Ity_I64, Ity_V128);
+
       case Iop_V128to64: case Iop_V128HIto64:
-      case Iop_Shorten16x8: case Iop_Shorten32x4: case Iop_Shorten64x2:
-      case Iop_QShortenU16Ux8: case Iop_QShortenU32Ux4: case Iop_QShortenU64Ux2:
-      case Iop_QShortenS16Sx8: case Iop_QShortenS32Sx4: case Iop_QShortenS64Sx2:
-      case Iop_QShortenU16Sx8: case Iop_QShortenU32Sx4: case Iop_QShortenU64Sx2:
+      case Iop_NarrowUn16to8x8:
+      case Iop_NarrowUn32to16x4:
+      case Iop_NarrowUn64to32x2:
+      case Iop_QNarrowUn16Uto8Ux8:
+      case Iop_QNarrowUn32Uto16Ux4:
+      case Iop_QNarrowUn64Uto32Ux2:
+      case Iop_QNarrowUn16Sto8Sx8:
+      case Iop_QNarrowUn32Sto16Sx4:
+      case Iop_QNarrowUn64Sto32Sx2:
+      case Iop_QNarrowUn16Sto8Ux8:
+      case Iop_QNarrowUn32Sto16Ux4:
+      case Iop_QNarrowUn64Sto32Ux2:
       case Iop_F32toF16x4:
          UNARY(Ity_V128, Ity_I64);
 
-      case Iop_Longen8Ux8: case Iop_Longen16Ux4: case Iop_Longen32Ux2:
-      case Iop_Longen8Sx8: case Iop_Longen16Sx4: case Iop_Longen32Sx2:
+      case Iop_Widen8Uto16x8:
+      case Iop_Widen16Uto32x4:
+      case Iop_Widen32Uto64x2:
+      case Iop_Widen8Sto16x8:
+      case Iop_Widen16Sto32x4:
+      case Iop_Widen32Sto64x2:
       case Iop_F16toF32x4:
          UNARY(Ity_I64, Ity_V128);
 
@@ -2327,19 +2439,23 @@
       case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4:
       case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4:
       case Iop_CmpEQ8x16:  case Iop_CmpEQ16x8:  case Iop_CmpEQ32x4:
+      case Iop_CmpEQ64x2:
       case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4:
       case Iop_CmpGT64Sx2:
       case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4:
       case Iop_Shl8x16: case Iop_Shl16x8: case Iop_Shl32x4: case Iop_Shl64x2:
-      case Iop_QShl8x16: case Iop_QShl16x8: case Iop_QShl32x4: case Iop_QShl64x2:
-      case Iop_QSal8x16: case Iop_QSal16x8: case Iop_QSal32x4: case Iop_QSal64x2:
+      case Iop_QShl8x16: case Iop_QShl16x8:
+      case Iop_QShl32x4: case Iop_QShl64x2:
+      case Iop_QSal8x16: case Iop_QSal16x8:
+      case Iop_QSal32x4: case Iop_QSal64x2:
       case Iop_Shr8x16: case Iop_Shr16x8: case Iop_Shr32x4: case Iop_Shr64x2:
       case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
       case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
       case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
-      case Iop_QNarrow16Ux8: case Iop_QNarrow32Ux4:
-      case Iop_QNarrow16Sx8: case Iop_QNarrow32Sx4:
-      case Iop_Narrow16x8:   case Iop_Narrow32x4:
+      case Iop_QNarrowBin16Sto8Ux16: case Iop_QNarrowBin32Sto16Ux8:
+      case Iop_QNarrowBin16Sto8Sx16: case Iop_QNarrowBin32Sto16Sx8:
+      case Iop_QNarrowBin16Uto8Ux16: case Iop_QNarrowBin32Uto16Ux8:
+      case Iop_NarrowBin16to8x16:   case Iop_NarrowBin32to16x8:
       case Iop_InterleaveHI8x16: case Iop_InterleaveHI16x8:
       case Iop_InterleaveHI32x4: case Iop_InterleaveHI64x2:
       case Iop_InterleaveLO8x16: case Iop_InterleaveLO16x8:
@@ -2439,6 +2555,43 @@
       case Iop_QDMulLong16Sx4: case Iop_QDMulLong32Sx2:
          BINARY(Ity_I64, Ity_I64, Ity_V128);
 
+         /* s390 specific */
+      case Iop_MAddF32:
+      case Iop_MSubF32:
+         QUATERNARY(ity_RMode,Ity_F32,Ity_F32,Ity_F32, Ity_F32);
+
+      case Iop_F64HLtoF128:
+        BINARY(Ity_F64,Ity_F64, Ity_F128);
+
+      case Iop_F128HItoF64:
+      case Iop_F128LOtoF64:
+        UNARY(Ity_F128, Ity_F64);
+
+      case Iop_AddF128:
+      case Iop_SubF128:
+      case Iop_MulF128:
+      case Iop_DivF128:
+         TERNARY(ity_RMode,Ity_F128,Ity_F128, Ity_F128);
+
+      case Iop_NegF128:
+      case Iop_AbsF128:
+         UNARY(Ity_F128, Ity_F128);
+
+      case Iop_SqrtF128:
+         BINARY(ity_RMode,Ity_F128, Ity_F128);
+
+      case Iop_I32StoF128: UNARY(Ity_I32, Ity_F128);
+      case Iop_I64StoF128: UNARY(Ity_I64, Ity_F128);
+
+      case Iop_F128toI32S: BINARY(ity_RMode,Ity_F128, Ity_I32);
+      case Iop_F128toI64S: BINARY(ity_RMode,Ity_F128, Ity_I64);
+
+      case Iop_F32toF128: UNARY(Ity_F32, Ity_F128);
+      case Iop_F64toF128: UNARY(Ity_F64, Ity_F128);
+
+      case Iop_F128toF32: BINARY(ity_RMode,Ity_F128, Ity_F32);
+      case Iop_F128toF64: BINARY(ity_RMode,Ity_F128, Ity_F64);
+
       default:
          ppIROp(op);
          vpanic("typeOfPrimop");
@@ -2521,6 +2674,8 @@
       case Ico_U16:   return Ity_I16;
       case Ico_U32:   return Ity_I32;
       case Ico_U64:   return Ity_I64;
+      case Ico_F32:   return Ity_F32;
+      case Ico_F32i:  return Ity_F32;
       case Ico_F64:   return Ity_F64;
       case Ico_F64i:  return Ity_F64;
       case Ico_V128:  return Ity_V128;
@@ -2580,7 +2735,7 @@
       case Ity_INVALID: case Ity_I1:
       case Ity_I8: case Ity_I16: case Ity_I32: 
       case Ity_I64: case Ity_I128:
-      case Ity_F32: case Ity_F64:
+      case Ity_F32: case Ity_F64: case Ity_F128:
       case Ity_V128:
          return True;
       default: 
@@ -3098,9 +3253,11 @@
    switch (stmt->tag) {
       case Ist_IMark:
          /* Somewhat heuristic, but rule out totally implausible
-            instruction sizes. */
+            instruction sizes and deltas. */
          if (stmt->Ist.IMark.len < 0 || stmt->Ist.IMark.len > 20)
             sanityCheckFail(bb,stmt,"IRStmt.IMark.len: implausible");
+         if (stmt->Ist.IMark.delta > 1)
+            sanityCheckFail(bb,stmt,"IRStmt.IMark.delta: implausible");
          break;
       case Ist_AbiHint:
          if (typeOfIRExpr(tyenv, stmt->Ist.AbiHint.base) != gWordTy)
@@ -3203,14 +3360,16 @@
          tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result);
          if (stmt->Ist.LLSC.storedata == NULL) {
             /* it's a LL */
-            if (tyRes != Ity_I64 && tyRes != Ity_I32 && tyRes != Ity_I8)
+            if (tyRes != Ity_I64 && tyRes != Ity_I32
+                && tyRes != Ity_I16 && tyRes != Ity_I8)
                sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus");
          } else {
             /* it's a SC */
             if (tyRes != Ity_I1)
                sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1");
             tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata);
-            if (tyData != Ity_I64 && tyData != Ity_I32 && tyData != Ity_I8)
+            if (tyData != Ity_I64 && tyData != Ity_I32
+                && tyData != Ity_I16 && tyData != Ity_I8)
                sanityCheckFail(bb,stmt,
                                "Ist.LLSC(SC).result :: storedata bogus");
          }
@@ -3259,7 +3418,7 @@
          break;
       case Ist_MBE:
          switch (stmt->Ist.MBE.event) {
-            case Imbe_Fence:
+            case Imbe_Fence: case Imbe_CancelReservation:
                break;
             default: sanityCheckFail(bb,stmt,"IRStmt.MBE.event: unknown");
                break;
@@ -3416,6 +3575,8 @@
       case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
       case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
       case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
+      case Ico_F32: return toBool( c1->Ico.F32 == c2->Ico.F32 );
+      case Ico_F32i: return toBool( c1->Ico.F32i == c2->Ico.F32i );
       case Ico_F64: return toBool( c1->Ico.F64 == c2->Ico.F64 );
       case Ico_F64i: return toBool( c1->Ico.F64i == c2->Ico.F64i );
       case Ico_V128: return toBool( c1->Ico.V128 == c2->Ico.V128 );
@@ -3440,6 +3601,7 @@
       case Ity_I128: return 16;
       case Ity_F32:  return 4;
       case Ity_F64:  return 8;
+      case Ity_F128: return 16;
       case Ity_V128: return 16;
       default: vex_printf("\n"); ppIRType(ty); vex_printf("\n");
                vpanic("sizeofIRType");
diff --git a/main/VEX/priv/ir_match.c b/main/VEX/priv/ir_match.c
index fc32f2e..39e483c 100644
--- a/main/VEX/priv/ir_match.c
+++ b/main/VEX/priv/ir_match.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/ir_match.h b/main/VEX/priv/ir_match.h
index 5755505..0db1be7 100644
--- a/main/VEX/priv/ir_match.h
+++ b/main/VEX/priv/ir_match.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/ir_opt.c b/main/VEX/priv/ir_opt.c
index 4730680..cb7f507 100644
--- a/main/VEX/priv/ir_opt.c
+++ b/main/VEX/priv/ir_opt.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -917,6 +917,22 @@
    }
 }
 
+/* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
+static Bool isZeroU32 ( IRExpr* e )
+{
+   return toBool( e->tag == Iex_Const 
+                  && e->Iex.Const.con->tag == Ico_U32
+                  && e->Iex.Const.con->Ico.U32 == 0);
+}
+
+/* Is this literally IRExpr_Const(IRConst_U64(0)) ? */
+static Bool isZeroU64 ( IRExpr* e )
+{
+   return toBool( e->tag == Iex_Const 
+                  && e->Iex.Const.con->tag == Ico_U64
+                  && e->Iex.Const.con->Ico.U64 == 0);
+}
+
 static Bool notBool ( Bool b )
 {
    if (b == True) return False;
@@ -956,6 +972,31 @@
    }
 }
 
+/* Helpers for folding Clz32/64. */
+static UInt fold_Clz64 ( ULong value )
+{
+   UInt i;
+   vassert(value != 0ULL); /* no defined semantics for arg==0 */
+   for (i = 0; i < 64; ++i) {
+      if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
+   }
+   vassert(0);
+   /*NOTREACHED*/
+   return 0;
+}
+
+static UInt fold_Clz32 ( UInt value )
+{
+   UInt i;
+   vassert(value != 0); /* no defined semantics for arg==0 */
+   for (i = 0; i < 32; ++i) {
+      if (0 != (value & (((UInt)1) << (31 - i)))) return i;
+   }
+   vassert(0);
+   /*NOTREACHED*/
+   return 0;
+}
+
 
 static IRExpr* fold_Expr ( IRExpr* e )
 {
@@ -1010,6 +1051,13 @@
             e2 = IRExpr_Const(IRConst_U32((UInt)s32));
             break;
          }
+         case Iop_16Sto32: {
+            /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
+            s32 <<= 16;
+            s32 >>= 16;
+            e2 = IRExpr_Const(IRConst_U32( (UInt)s32) );
+            break;
+         }
          case Iop_8Uto64:
             e2 = IRExpr_Const(IRConst_U64(
                     0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
@@ -1022,6 +1070,17 @@
             e2 = IRExpr_Const(IRConst_U32(
                     0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
             break;
+         case Iop_8Sto16: {
+            /* signed */ Short s16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
+            s16 <<= 8;
+            s16 >>= 8;
+            e2 = IRExpr_Const(IRConst_U16( (UShort)s16) );
+            break;
+         }
+         case Iop_8Uto16:
+            e2 = IRExpr_Const(IRConst_U16(
+                    0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
+            break;
          case Iop_16Uto32:
             e2 = IRExpr_Const(IRConst_U32(
                     0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
@@ -1096,6 +1155,13 @@
                     0xFFFFFFFFULL 
                     & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
             break;
+         case Iop_16Sto64: {
+            /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
+            s64 <<= 48;
+            s64 >>= 48;
+            e2 = IRExpr_Const(IRConst_U64((ULong)s64));
+            break;
+         }
          case Iop_32Sto64: {
             /* signed */ Long s64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
             s64 <<= 32;
@@ -1103,6 +1169,21 @@
             e2 = IRExpr_Const(IRConst_U64((ULong)s64));
             break;
          }
+
+         case Iop_16to8: {
+            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
+            w16 &= 0xFF;
+            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
+            break;
+         }
+         case Iop_16HIto8: {
+            UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
+            w16 >>= 8;
+            w16 &= 0xFF;
+            e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
+            break;
+         }
+
          case Iop_CmpNEZ8:
             e2 = IRExpr_Const(IRConst_U1(toBool(
                     0 != 
@@ -1154,6 +1235,19 @@
             break;
          }
 
+         case Iop_Clz32: {
+            UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
+            if (u32 != 0)
+               e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
+            break;
+         }
+         case Iop_Clz64: {
+            ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
+            if (u64 != 0ULL)
+               e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
+            break;
+         }
+
          default: 
             goto unhandled;
       }
@@ -1216,6 +1310,11 @@
                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
                         & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
                break;
+            case Iop_And16:
+               e2 = IRExpr_Const(IRConst_U16(toUShort(
+                       (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
+                        & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
+               break;
             case Iop_And32:
                e2 = IRExpr_Const(IRConst_U32(
                        (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
@@ -1401,6 +1500,11 @@
                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
                         <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
                break;
+            case Iop_CmpLE64U:
+               e2 = IRExpr_Const(IRConst_U1(toBool(
+                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
+                        <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
+               break;
 
             /* -- CmpLES -- */
             case Iop_CmpLE32S:
@@ -1408,6 +1512,11 @@
                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
                         <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
                break;
+            case Iop_CmpLE64S:
+               e2 = IRExpr_Const(IRConst_U1(toBool(
+                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
+                        <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
+               break;
 
             /* -- CmpLTS -- */
             case Iop_CmpLT32S:
@@ -1415,6 +1524,11 @@
                        ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
                         < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
                break;
+            case Iop_CmpLT64S:
+               e2 = IRExpr_Const(IRConst_U1(toBool(
+                       ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
+                        < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
+               break;
 
             /* -- CmpLTU -- */
             case Iop_CmpLT32U:
@@ -1422,6 +1536,11 @@
                        ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
                         < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
                break;
+            case Iop_CmpLT64U:
+               e2 = IRExpr_Const(IRConst_U1(toBool(
+                       ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
+                        < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
+               break;
 
             /* -- CmpORD -- */
             case Iop_CmpORD32S: {
@@ -1491,11 +1610,18 @@
             e2 = e->Iex.Binop.arg1;
          } else
 
-         /* Or32/Add32/Max32U(x,0) ==> x */
-         if ((e->Iex.Binop.op == Iop_Add32 
-              || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
-             && e->Iex.Binop.arg2->tag == Iex_Const
-             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
+         /* Or32/Add32/Max32U(x,0) ==> x
+            Or32/Add32/Max32U(0,x) ==> x */
+         if (e->Iex.Binop.op == Iop_Add32
+             || e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U) {
+            if (isZeroU32(e->Iex.Binop.arg2))
+               e2 = e->Iex.Binop.arg1;
+            else if (isZeroU32(e->Iex.Binop.arg1))
+               e2 = e->Iex.Binop.arg2;
+         } else
+
+         /* Sub64(x,0) ==> x */
+         if (e->Iex.Binop.op == Iop_Sub64 && isZeroU64(e->Iex.Binop.arg2)) {
             e2 = e->Iex.Binop.arg1;
          } else
 
@@ -1536,11 +1662,13 @@
          } else
          /* NB no Add16(t,t) case yet as no known test case exists */
 
-         /* Or64/Add64(x,0) ==> x */
-         if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
-             && e->Iex.Binop.arg2->tag == Iex_Const
-             && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
-            e2 = e->Iex.Binop.arg1;
+         /* Or64/Add64(x,0) ==> x
+            Or64/Add64(0,x) ==> x */
+         if (e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64) {
+            if (isZeroU64(e->Iex.Binop.arg2))
+               e2 = e->Iex.Binop.arg1;
+            else if (isZeroU64(e->Iex.Binop.arg1))
+               e2 = e->Iex.Binop.arg2;
          } else
 
          /* And32(x,0xFFFFFFFF) ==> x */
@@ -1571,20 +1699,6 @@
             e2 = e->Iex.Binop.arg2;
          } else
 
-         /* Or32/Max32U(0,x) ==> x */
-         if ((e->Iex.Binop.op == Iop_Or32 || e->Iex.Binop.op == Iop_Max32U)
-             && e->Iex.Binop.arg1->tag == Iex_Const
-             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
-            e2 = e->Iex.Binop.arg2;
-         } else
-
-         /* Or64(0,x) ==> x */
-         if (e->Iex.Binop.op == Iop_Or64
-             && e->Iex.Binop.arg1->tag == Iex_Const
-             && e->Iex.Binop.arg1->Iex.Const.con->Ico.U64 == 0) {
-            e2 = e->Iex.Binop.arg2;
-         } else
-
          /* Or8/16/32/64/V128(t,t) ==> t, for some IRTemp t */
          /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
          /* Max32U(t,t) ==> t, for some IRTemp t */
@@ -1894,7 +2008,9 @@
       }
 
       case Ist_IMark:
-         return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
+         return IRStmt_IMark(st->Ist.IMark.addr,
+                             st->Ist.IMark.len,
+                             st->Ist.IMark.delta);
 
       case Ist_NoOp:
          return IRStmt_NoOp();
@@ -3945,6 +4061,14 @@
                              IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg, 
                                                      a2->Iex.Unop.arg ) );
       break;
+
+   case Iop_CmpNE32:
+      /* Since X has type Ity_I1 we can simplify:
+         CmpNE32(1Uto32(X),0)) ==> X */
+      if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
+         return a1->Iex.Unop.arg;
+      break;
+
    default:
       break;
    }
@@ -3987,6 +4111,14 @@
       /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
       if (is_Unop(aa, Iop_Left32)) 
          return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
+      /* CmpNEZ32( 1Uto32(X) ) --> X */
+      if (is_Unop(aa, Iop_1Uto32))
+         return aa->Iex.Unop.arg;
+      break;
+   case Iop_CmpNEZ8:
+      /* CmpNEZ8( 1Uto8(X) ) --> X */
+      if (is_Unop(aa, Iop_1Uto8))
+         return aa->Iex.Unop.arg;
       break;
    case Iop_Left32:
       /* Left32( Left32(x) ) --> Left32(x) */
@@ -4166,7 +4298,9 @@
                    st->Ist.Exit.dst
                 );
       case Ist_IMark:
-         return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
+         return IRStmt_IMark(st->Ist.IMark.addr,
+                             st->Ist.IMark.len,
+                             st->Ist.IMark.delta);
       case Ist_NoOp:
          return IRStmt_NoOp();
       case Ist_MBE:
@@ -4506,7 +4640,7 @@
                case Ity_I1: case Ity_I8: case Ity_I16: 
                case Ity_I32: case Ity_I64: case Ity_I128: 
                   break;
-               case Ity_F32: case Ity_F64: case Ity_V128: 
+               case Ity_F32: case Ity_F64: case Ity_F128: case Ity_V128:
                   *hasVorFtemps = True;
                   break;
                default: 
@@ -4611,6 +4745,7 @@
       bb = cprop_BB(bb);
       bb = spec_helpers_BB ( bb, specHelper );
       redundant_put_removal_BB ( bb, preciseMemExnsFn );
+      do_cse_BB( bb );
       do_deadcode_BB( bb );
    }
 
diff --git a/main/VEX/priv/ir_opt.h b/main/VEX/priv/ir_opt.h
index ecdb146..9390a1c 100644
--- a/main/VEX/priv/ir_opt.h
+++ b/main/VEX/priv/ir_opt.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/main_globals.c b/main/VEX/priv/main_globals.c
index 716fa75..8da96be 100644
--- a/main/VEX/priv/main_globals.c
+++ b/main/VEX/priv/main_globals.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/main_globals.h b/main/VEX/priv/main_globals.h
index 5b561a3..6404526 100644
--- a/main/VEX/priv/main_globals.h
+++ b/main/VEX/priv/main_globals.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
diff --git a/main/VEX/priv/main_main.c b/main/VEX/priv/main_main.c
index 1e80972..5b818ae 100644
--- a/main/VEX/priv/main_main.c
+++ b/main/VEX/priv/main_main.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -40,6 +40,7 @@
 #include "libvex_guest_arm.h"
 #include "libvex_guest_ppc32.h"
 #include "libvex_guest_ppc64.h"
+#include "libvex_guest_s390x.h"
 
 #include "main_globals.h"
 #include "main_util.h"
@@ -50,12 +51,14 @@
 #include "host_amd64_defs.h"
 #include "host_ppc_defs.h"
 #include "host_arm_defs.h"
+#include "host_s390_defs.h"
 
 #include "guest_generic_bb_to_IR.h"
 #include "guest_x86_defs.h"
 #include "guest_amd64_defs.h"
 #include "guest_arm_defs.h"
 #include "guest_ppc_defs.h"
+#include "guest_s390_defs.h"
 
 #include "host_generic_simd128.h"
 
@@ -182,7 +185,7 @@
    void         (*ppReg)        ( HReg );
    HInstrArray* (*iselSB)       ( IRSB*, VexArch, VexArchInfo*, 
                                                   VexAbiInfo* );
-   Int          (*emit)         ( UChar*, Int, HInstr*, Bool, void* );
+   Int          (*emit)         ( UChar*, Int, HInstr*, Bool, void*, void* );
    IRExpr*      (*specHelper)   ( HChar*, IRExpr**, IRStmt**, Int );
    Bool         (*preciseMemExnsFn) ( Int, Int );
 
@@ -195,7 +198,7 @@
    HInstrArray*    rcode;
    Int             i, j, k, out_used, guest_sizeB;
    Int             offB_TISTART, offB_TILEN;
-   UChar           insn_bytes[32];
+   UChar           insn_bytes[48];
    IRType          guest_word_type;
    IRType          host_word_type;
    Bool            mode64;
@@ -225,6 +228,8 @@
    vex_traceflags = vta->traceflags;
 
    vassert(vex_initdone);
+   vassert(vta->needs_self_check != NULL);
+
    vexSetAllocModeTEMP_and_clear();
    vexAllocSanityCheck();
 
@@ -249,11 +254,14 @@
          ppInstr      = (void(*)(HInstr*, Bool)) ppX86Instr;
          ppReg        = (void(*)(HReg)) ppHRegX86;
          iselSB       = iselSB_X86;
-         emit         = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_X86Instr;
+         emit         = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                        emit_X86Instr;
          host_is_bigendian = False;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
+         /* jump-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted != NULL);
+         vassert(vta->dispatch_assisted != NULL);
          break;
 
       case VexArchAMD64:
@@ -271,11 +279,14 @@
          ppInstr     = (void(*)(HInstr*, Bool)) ppAMD64Instr;
          ppReg       = (void(*)(HReg)) ppHRegAMD64;
          iselSB      = iselSB_AMD64;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_AMD64Instr;
+         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                       emit_AMD64Instr;
          host_is_bigendian = False;
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch != NULL); /* jump-to-dispatcher scheme */
+         /* jump-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted != NULL);
+         vassert(vta->dispatch_assisted != NULL);
          break;
 
       case VexArchPPC32:
@@ -290,11 +301,14 @@
          ppInstr     = (void(*)(HInstr*,Bool)) ppPPCInstr;
          ppReg       = (void(*)(HReg)) ppHRegPPC;
          iselSB      = iselSB_PPC;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                       emit_PPCInstr;
          host_is_bigendian = True;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+         /* return-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted == NULL);
+         vassert(vta->dispatch_assisted == NULL);
          break;
 
       case VexArchPPC64:
@@ -309,11 +323,36 @@
          ppInstr     = (void(*)(HInstr*, Bool)) ppPPCInstr;
          ppReg       = (void(*)(HReg)) ppHRegPPC;
          iselSB      = iselSB_PPC;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_PPCInstr;
+         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                       emit_PPCInstr;
          host_is_bigendian = True;
          host_word_type    = Ity_I64;
          vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+         /* return-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted == NULL);
+         vassert(vta->dispatch_assisted == NULL);
+         break;
+
+      case VexArchS390X:
+         mode64      = True;
+         getAllocableRegs_S390 ( &n_available_real_regs,
+                                 &available_real_regs, mode64 );
+         isMove      = (Bool(*)(HInstr*,HReg*,HReg*)) isMove_S390Instr;
+         getRegUsage = (void(*)(HRegUsage*,HInstr*, Bool)) getRegUsage_S390Instr;
+         mapRegs     = (void(*)(HRegRemap*,HInstr*, Bool)) mapRegs_S390Instr;
+         genSpill    = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genSpill_S390;
+         genReload   = (void(*)(HInstr**,HInstr**,HReg,Int,Bool)) genReload_S390;
+         ppInstr     = (void(*)(HInstr*, Bool)) ppS390Instr;
+         ppReg       = (void(*)(HReg)) ppHRegS390;
+         iselSB      = iselSB_S390;
+         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                       emit_S390Instr;
+         host_is_bigendian = True;
+         host_word_type    = Ity_I64;
+         vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_host.hwcaps));
+         /* return-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted == NULL);
+         vassert(vta->dispatch_assisted == NULL);
          break;
 
       case VexArchARM:
@@ -328,11 +367,14 @@
          ppInstr     = (void(*)(HInstr*, Bool)) ppARMInstr;
          ppReg       = (void(*)(HReg)) ppHRegARM;
          iselSB      = iselSB_ARM;
-         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*)) emit_ARMInstr;
+         emit        = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+                       emit_ARMInstr;
          host_is_bigendian = False;
          host_word_type    = Ity_I32;
          vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
-         vassert(vta->dispatch == NULL); /* return-to-dispatcher scheme */
+         vassert(vta->dispatch_unassisted == NULL);
+         vassert(vta->dispatch_assisted == NULL);
+         /* return-to-dispatcher scheme */
          break;
 
       default:
@@ -407,6 +449,22 @@
          vassert(sizeof( ((VexGuestPPC64State*)0)->guest_NRADDR_GPR2) == 8);
          break;
 
+      case VexArchS390X:
+         preciseMemExnsFn = guest_s390x_state_requires_precise_mem_exns;
+         disInstrFn       = disInstr_S390;
+         specHelper       = guest_s390x_spechelper;
+         guest_sizeB      = sizeof(VexGuestS390XState);
+         guest_word_type  = Ity_I64;
+         guest_layout     = &s390xGuest_layout;
+         offB_TISTART     = offsetof(VexGuestS390XState,guest_TISTART);
+         offB_TILEN       = offsetof(VexGuestS390XState,guest_TILEN);
+         vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_guest.hwcaps));
+         vassert(0 == sizeof(VexGuestS390XState) % 16);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_TISTART    ) == 8);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_TILEN      ) == 8);
+         vassert(sizeof( ((VexGuestS390XState*)0)->guest_NRADDR     ) == 8);
+         break;
+
       case VexArchARM:
          preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
          disInstrFn       = disInstr_ARM;
@@ -427,6 +485,11 @@
          vpanic("LibVEX_Translate: unsupported guest insn set");
    }
 
+   /* Set up result struct. */
+   VexTranslateResult res;
+   res.status       = VexTransOK;
+   res.n_sc_extents = 0;
+
    /* yet more sanity checks ... */
    if (vta->arch_guest == vta->arch_host) {
       /* doesn't necessarily have to be true, but if it isn't it means
@@ -443,6 +506,7 @@
                    "------------------------\n\n");
 
    irsb = bb_to_IR ( vta->guest_extents,
+                     &res.n_sc_extents,
                      vta->callback_opaque,
                      disInstrFn,
                      vta->guest_bytes, 
@@ -453,7 +517,7 @@
                      &vta->archinfo_guest,
                      &vta->abiinfo_both,
                      guest_word_type,
-                     vta->do_self_check,
+                     vta->needs_self_check,
                      vta->preamble_function,
                      offB_TISTART,
                      offB_TILEN );
@@ -464,7 +528,7 @@
       /* Access failure. */
       vexSetAllocModeTEMP_and_clear();
       vex_traceflags = 0;
-      return VexTransAccessFail;
+      res.status = VexTransAccessFail; return res;
    }
 
    vassert(vta->guest_extents->n_used >= 1 && vta->guest_extents->n_used <= 3);
@@ -580,7 +644,10 @@
    }
 
    /* HACK */
-   if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+   if (0) {
+      *(vta->host_bytes_used) = 0;
+      res.status = VexTransOK; return res;
+   }
    /* end HACK */
 
    if (vex_traceflags & VEX_TRACE_VCODE)
@@ -628,7 +695,10 @@
    }
 
    /* HACK */
-   if (0) { *(vta->host_bytes_used) = 0; return VexTransOK; }
+   if (0) { 
+      *(vta->host_bytes_used) = 0;
+      res.status = VexTransOK; return res;
+   }
    /* end HACK */
 
    /* Assemble */
@@ -644,7 +714,8 @@
          ppInstr(rcode->arr[i], mode64);
          vex_printf("\n");
       }
-      j = (*emit)( insn_bytes, 32, rcode->arr[i], mode64, vta->dispatch );
+      j = (*emit)( insn_bytes, sizeof insn_bytes, rcode->arr[i], mode64,
+                   vta->dispatch_unassisted, vta->dispatch_assisted );
       if (vex_traceflags & VEX_TRACE_ASM) {
          for (k = 0; k < j; k++)
             if (insn_bytes[k] < 16)
@@ -656,7 +727,8 @@
       if (out_used + j > vta->host_bytes_size) {
          vexSetAllocModeTEMP_and_clear();
          vex_traceflags = 0;
-         return VexTransOutputFull;
+         res.status = VexTransOutputFull;
+         return res;
       }
       for (k = 0; k < j; k++) {
          vta->host_bytes[out_used] = insn_bytes[k];
@@ -671,7 +743,8 @@
    vexSetAllocModeTEMP_and_clear();
 
    vex_traceflags = 0;
-   return VexTransOK;
+   res.status = VexTransOK;
+   return res;
 }
 
 
@@ -716,6 +789,7 @@
       case VexArchARM:      return "ARM";
       case VexArchPPC32:    return "PPC32";
       case VexArchPPC64:    return "PPC64";
+      case VexArchS390X:    return "S390X";
       default:              return "VexArch???";
    }
 }
@@ -815,6 +889,7 @@
    const UInt V  = VEX_HWCAPS_PPC32_V;
    const UInt FX = VEX_HWCAPS_PPC32_FX;
    const UInt GX = VEX_HWCAPS_PPC32_GX;
+   const UInt VX = VEX_HWCAPS_PPC32_VX;
          UInt c  = hwcaps;
    if (c == 0)           return "ppc32-int";
    if (c == F)           return "ppc32-int-flt";
@@ -825,6 +900,7 @@
    if (c == (F|V|FX))    return "ppc32-int-flt-vmx-FX";
    if (c == (F|V|GX))    return "ppc32-int-flt-vmx-GX";
    if (c == (F|V|FX|GX)) return "ppc32-int-flt-vmx-FX-GX";
+   if (c == (F|V|FX|GX|VX)) return "ppc32-int-flt-vmx-FX-GX-VX";
    return NULL;
 }
 
@@ -835,6 +911,7 @@
    const UInt V  = VEX_HWCAPS_PPC64_V;
    const UInt FX = VEX_HWCAPS_PPC64_FX;
    const UInt GX = VEX_HWCAPS_PPC64_GX;
+   const UInt VX = VEX_HWCAPS_PPC64_VX;
          UInt c  = hwcaps;
    if (c == 0)         return "ppc64-int-flt";
    if (c == FX)        return "ppc64-int-flt-FX";
@@ -844,6 +921,7 @@
    if (c == (V|FX))    return "ppc64-int-flt-vmx-FX";
    if (c == (V|GX))    return "ppc64-int-flt-vmx-GX";
    if (c == (V|FX|GX)) return "ppc64-int-flt-vmx-FX-GX";
+   if (c == (V|FX|GX|VX)) return "ppc64-int-flt-vmx-FX-GX-VX";
    return NULL;
 }
 
@@ -887,6 +965,42 @@
    return NULL;
 }
 
+static HChar* show_hwcaps_s390x ( UInt hwcaps )
+{
+   static const HChar prefix[] = "s390x";
+   static const HChar facilities[][6] = {
+     { "ldisp" },
+     { "eimm" },
+     { "gie" },
+     { "dfp" },
+     { "fgx" },
+   };
+   static HChar buf[sizeof facilities + sizeof prefix + 1];
+   static HChar *p;
+
+   if (buf[0] != '\0') return buf;  /* already constructed */
+
+   hwcaps = VEX_HWCAPS_S390X(hwcaps);
+
+   p = buf + vex_sprintf(buf, "%s", prefix);
+   if (hwcaps & VEX_HWCAPS_S390X_LDISP)
+     p = p + vex_sprintf(p, "-%s", facilities[0]);
+   if (hwcaps & VEX_HWCAPS_S390X_EIMM)
+     p = p + vex_sprintf(p, "-%s", facilities[1]);
+   if (hwcaps & VEX_HWCAPS_S390X_GIE)
+     p = p + vex_sprintf(p, "-%s", facilities[2]);
+   if (hwcaps & VEX_HWCAPS_S390X_DFP)
+     p = p + vex_sprintf(p, "-%s", facilities[3]);
+   if (hwcaps & VEX_HWCAPS_S390X_FGX)
+     p = p + vex_sprintf(p, "-%s", facilities[4]);
+
+   /* If there are no facilities, add "zarch" */
+   if (hwcaps == 0)
+     vex_sprintf(p, "-%s", "zarch");
+
+   return buf;
+}
+
 /* ---- */
 static HChar* show_hwcaps ( VexArch arch, UInt hwcaps )
 {
@@ -896,6 +1010,7 @@
       case VexArchPPC32: return show_hwcaps_ppc32(hwcaps);
       case VexArchPPC64: return show_hwcaps_ppc64(hwcaps);
       case VexArchARM:   return show_hwcaps_arm(hwcaps);
+      case VexArchS390X: return show_hwcaps_s390x(hwcaps);
       default: return NULL;
    }
 }
diff --git a/main/VEX/priv/main_util.c b/main/VEX/priv/main_util.c
index d12380e..618254b 100644
--- a/main/VEX/priv/main_util.c
+++ b/main/VEX/priv/main_util.c
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -235,7 +235,7 @@
    New code for vex_util.c should go above this point. */
 #include <stdarg.h>
 
-static Int vex_strlen ( const HChar* str )
+Int vex_strlen ( const HChar* str )
 {
    Int i = 0;
    while (str[i] != 0) i++;
diff --git a/main/VEX/priv/main_util.h b/main/VEX/priv/main_util.h
index 1392b4b..914dc64 100644
--- a/main/VEX/priv/main_util.h
+++ b/main/VEX/priv/main_util.h
@@ -7,7 +7,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright (C) 2004-2010 OpenWorks LLP
+   Copyright (C) 2004-2011 OpenWorks LLP
       info@open-works.net
 
    This program is free software; you can redistribute it and/or
@@ -43,13 +43,15 @@
 
 #define NULL ((void*)0)
 
+#define LIKELY(x)       __builtin_expect(!!(x), 1)
+#define UNLIKELY(x)     __builtin_expect(!!(x), 0)
 
 /* Stuff for panicking and assertion. */
 
 #define VG__STRING(__str)  #__str
 
 #define vassert(expr)                                           \
-  ((void) ((expr) ? 0 :                                         \
+  ((void) (LIKELY(expr) ? 0 :                                   \
            (vex_assert_fail (VG__STRING(expr),                  \
                              __FILE__, __LINE__,                \
                              __PRETTY_FUNCTION__), 0)))
@@ -73,6 +75,7 @@
 /* String ops */
 
 extern Bool vex_streq ( const HChar* s1, const HChar* s2 );
+extern Int vex_strlen ( const HChar* str );
 
 
 /* Storage management: clear the area, and allocate from it. */